{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Geneformer Fine-Tuning for Classification of Dosage-Sensitive vs. -Insensitive Transcription Factors (TFs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "GPU_NUMBER = [0]\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \",\".join([str(s) for s in GPU_NUMBER])\n",
    "os.environ[\"NCCL_DEBUG\"] = \"INFO\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "import datetime\n",
    "import subprocess\n",
    "import math\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from datasets import load_from_disk\n",
    "from sklearn import preprocessing\n",
    "from sklearn.metrics import accuracy_score, auc, confusion_matrix, ConfusionMatrixDisplay, roc_curve\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "import torch\n",
    "from transformers import BertForTokenClassification\n",
    "from transformers import Trainer\n",
    "from transformers.training_args import TrainingArguments\n",
    "from tqdm.notebook import tqdm\n",
    "\n",
    "from geneformer import DataCollatorForGeneClassification\n",
    "from geneformer.pretrainer import token_dictionary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Gene Attribute Information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# table of corresponding Ensembl IDs, gene names, and gene types (e.g. coding, miRNA, etc.)\n",
    "gene_info = pd.read_csv(\"/path/to/gene_info_table.csv\", index_col=0)\n",
    "\n",
    "# create dictionaries for corresponding attributes\n",
    "gene_id_type_dict = dict(zip(gene_info[\"ensembl_id\"],gene_info[\"gene_type\"]))\n",
    "gene_name_id_dict = dict(zip(gene_info[\"gene_name\"],gene_info[\"ensembl_id\"]))\n",
    "gene_id_name_dict = {v: k for k,v in gene_name_id_dict.items()}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Training Data and Class Labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# function for preparing targets and labels\n",
    "def prep_inputs(genegroup1, genegroup2, id_type):\n",
    "    if id_type == \"gene_name\":\n",
    "        targets1 = [gene_name_id_dict[gene] for gene in genegroup1 if gene_name_id_dict.get(gene) in token_dictionary]\n",
    "        targets2 = [gene_name_id_dict[gene] for gene in genegroup2 if gene_name_id_dict.get(gene) in token_dictionary]\n",
    "    elif id_type == \"ensembl_id\":\n",
    "        targets1 = [gene for gene in genegroup1 if gene in token_dictionary]\n",
    "        targets2 = [gene for gene in genegroup2 if gene in token_dictionary]\n",
    "            \n",
    "    targets1_id = [token_dictionary[gene] for gene in targets1]\n",
    "    targets2_id = [token_dictionary[gene] for gene in targets2]\n",
    "    \n",
    "    targets = np.array(targets1_id + targets2_id)\n",
    "    labels = np.array([0]*len(targets1_id) + [1]*len(targets2_id))\n",
    "    nsplits = min(5, min(len(targets1_id), len(targets2_id))-1)\n",
    "    assert nsplits > 2\n",
    "    print(f\"# targets1: {len(targets1_id)}\\n# targets2: {len(targets2_id)}\\n# splits: {nsplits}\")\n",
    "    return targets, labels, nsplits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# preparing targets and labels for dosage sensitive vs insensitive TFs\n",
    "dosage_tfs = pd.read_csv(\"/path/to/dosage_sens_tf_labels.csv\", header=0)\n",
    "sensitive = dosage_tfs[\"dosage_sensitive\"].dropna()\n",
    "insensitive = dosage_tfs[\"dosage_insensitive\"].dropna()\n",
    "targets, labels, nsplits = prep_inputs(sensitive, insensitive, \"ensembl_id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load training dataset\n",
    "train_dataset=load_from_disk(\"/path/to/gene_train_data.dataset\")\n",
    "shuffled_train_dataset = train_dataset.shuffle(seed=42)\n",
    "subsampled_train_dataset = shuffled_train_dataset.select([i for i in range(50_000)])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define Functions for Training and Cross-Validating Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess_classifier_batch(cell_batch, max_len):\n",
    "    if max_len == None:\n",
    "        max_len = max([len(i) for i in cell_batch[\"input_ids\"]])\n",
    "    def pad_label_example(example):\n",
    "        example[\"labels\"] = np.pad(example[\"labels\"], \n",
    "                                   (0, max_len-len(example[\"input_ids\"])), \n",
    "                                   mode='constant', constant_values=-100)\n",
    "        example[\"input_ids\"] = np.pad(example[\"input_ids\"], \n",
    "                                      (0, max_len-len(example[\"input_ids\"])), \n",
    "                                      mode='constant', constant_values=token_dictionary.get(\"<pad>\"))\n",
    "        example[\"attention_mask\"] = (example[\"input_ids\"] != token_dictionary.get(\"<pad>\")).astype(int)\n",
    "        return example\n",
    "    padded_batch = cell_batch.map(pad_label_example)\n",
    "    return padded_batch\n",
    "\n",
    "# forward batch size is batch size for model inference (e.g. 200)\n",
    "def classifier_predict(model, evalset, forward_batch_size, mean_fpr):\n",
    "    predict_logits = []\n",
    "    predict_labels = []\n",
    "    model.eval()\n",
    "    \n",
    "    # ensure there is at least 2 examples in each batch to avoid incorrect tensor dims\n",
    "    evalset_len = len(evalset)\n",
    "    max_divisible = find_largest_div(evalset_len, forward_batch_size)\n",
    "    if len(evalset) - max_divisible == 1:\n",
    "        evalset_len = max_divisible\n",
    "    \n",
    "    max_evalset_len = max(evalset.select([i for i in range(evalset_len)])[\"length\"])\n",
    "    \n",
    "    for i in range(0, evalset_len, forward_batch_size):\n",
    "        max_range = min(i+forward_batch_size, evalset_len)\n",
    "        batch_evalset = evalset.select([i for i in range(i, max_range)])\n",
    "        padded_batch = preprocess_classifier_batch(batch_evalset, max_evalset_len)\n",
    "        padded_batch.set_format(type=\"torch\")\n",
    "        \n",
    "        input_data_batch = padded_batch[\"input_ids\"]\n",
    "        attn_msk_batch = padded_batch[\"attention_mask\"]\n",
    "        label_batch = padded_batch[\"labels\"]\n",
    "        with torch.no_grad():\n",
    "            outputs = model(\n",
    "                input_ids = input_data_batch.to(\"cuda\"), \n",
    "                attention_mask = attn_msk_batch.to(\"cuda\"), \n",
    "                labels = label_batch.to(\"cuda\"), \n",
    "            )\n",
    "            predict_logits += [torch.squeeze(outputs.logits.to(\"cpu\"))]\n",
    "            predict_labels += [torch.squeeze(label_batch.to(\"cpu\"))]\n",
    "            \n",
    "    logits_by_cell = torch.cat(predict_logits)\n",
    "    all_logits = logits_by_cell.reshape(-1, logits_by_cell.shape[2])\n",
    "    labels_by_cell = torch.cat(predict_labels)\n",
    "    all_labels = torch.flatten(labels_by_cell)\n",
    "    logit_label_paired = [item for item in list(zip(all_logits.tolist(), all_labels.tolist())) if item[1]!=-100]\n",
    "    y_pred = [vote(item[0]) for item in logit_label_paired]\n",
    "    y_true = [item[1] for item in logit_label_paired]\n",
    "    logits_list = [item[0] for item in logit_label_paired]\n",
    "    # probability of class 1\n",
    "    y_score = [py_softmax(item)[1] for item in logits_list]\n",
    "    conf_mat = confusion_matrix(y_true, y_pred)\n",
    "    fpr, tpr, _ = roc_curve(y_true, y_score)\n",
    "    # plot roc_curve for this split\n",
    "    plt.plot(fpr, tpr)\n",
    "    plt.xlim([0.0, 1.0])\n",
    "    plt.ylim([0.0, 1.05])\n",
    "    plt.xlabel('False Positive Rate')\n",
    "    plt.ylabel('True Positive Rate')\n",
    "    plt.title('ROC')\n",
    "    plt.show()\n",
    "    # interpolate to graph\n",
    "    interp_tpr = np.interp(mean_fpr, fpr, tpr)\n",
    "    interp_tpr[0] = 0.0\n",
    "    return fpr, tpr, interp_tpr, conf_mat \n",
    "\n",
    "def vote(logit_pair):\n",
    "    a, b = logit_pair\n",
    "    if a > b:\n",
    "        return 0\n",
    "    elif b > a:\n",
    "        return 1\n",
    "    elif a == b:\n",
    "        return \"tie\"\n",
    "    \n",
    "def py_softmax(vector):\n",
    "\te = np.exp(vector)\n",
    "\treturn e / e.sum()\n",
    "    \n",
    "# get cross-validated mean and sd metrics\n",
    "def get_cross_valid_metrics(all_tpr, all_roc_auc, all_tpr_wt):\n",
    "    wts = [count/sum(all_tpr_wt) for count in all_tpr_wt]\n",
    "    print(wts)\n",
    "    all_weighted_tpr = [a*b for a,b in zip(all_tpr, wts)]\n",
    "    mean_tpr = np.sum(all_weighted_tpr, axis=0)\n",
    "    mean_tpr[-1] = 1.0\n",
    "    all_weighted_roc_auc = [a*b for a,b in zip(all_roc_auc, wts)]\n",
    "    roc_auc = np.sum(all_weighted_roc_auc)\n",
    "    roc_auc_sd = math.sqrt(np.average((all_roc_auc-roc_auc)**2, weights=wts))\n",
    "    return mean_tpr, roc_auc, roc_auc_sd\n",
    "\n",
    "# Function to find the largest number smaller\n",
    "# than or equal to N that is divisible by k\n",
    "def find_largest_div(N, K):\n",
    "    rem = N % K\n",
    "    if(rem == 0):\n",
    "        return N\n",
    "    else:\n",
    "        return N - rem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cross-validate gene classifier\n",
    "def cross_validate(data, targets, labels, nsplits, subsample_size, training_args, freeze_layers, output_dir, num_proc):\n",
    "    # check if output directory already written to\n",
    "    # ensure not overwriting previously saved model\n",
    "    model_dir_test = os.path.join(output_dir, \"ksplit0/models/pytorch_model.bin\")\n",
    "    if os.path.isfile(model_dir_test) == True:\n",
    "        raise Exception(\"Model already saved to this directory.\")\n",
    "    \n",
    "    # initiate eval metrics to return\n",
    "    num_classes = len(set(labels))\n",
    "    mean_fpr = np.linspace(0, 1, 100)\n",
    "    all_tpr = []\n",
    "    all_roc_auc = []\n",
    "    all_tpr_wt = []\n",
    "    label_dicts = []\n",
    "    confusion = np.zeros((num_classes,num_classes))\n",
    "    \n",
    "    # set up cross-validation splits\n",
    "    skf = StratifiedKFold(n_splits=nsplits, random_state=0, shuffle=True)\n",
    "    # train and evaluate\n",
    "    iteration_num = 0\n",
    "    for train_index, eval_index in tqdm(skf.split(targets, labels)):\n",
    "        if len(labels) > 500:\n",
    "            print(\"early stopping activated due to large # of training examples\")\n",
    "            nsplits = 3\n",
    "            if iteration_num == 3:\n",
    "                break\n",
    "        print(f\"****** Crossval split: {iteration_num}/{nsplits-1} ******\\n\")\n",
    "        # generate cross-validation splits\n",
    "        targets_train, targets_eval = targets[train_index], targets[eval_index]\n",
    "        labels_train, labels_eval = labels[train_index], labels[eval_index]\n",
    "        label_dict_train = dict(zip(targets_train, labels_train))\n",
    "        label_dict_eval = dict(zip(targets_eval, labels_eval))\n",
    "        label_dicts += (iteration_num, targets_train, targets_eval, labels_train, labels_eval)\n",
    "        \n",
    "        # function to filter by whether contains train or eval labels\n",
    "        def if_contains_train_label(example):\n",
    "            a = label_dict_train.keys()\n",
    "            b = example['input_ids']\n",
    "            return not set(a).isdisjoint(b)\n",
    "\n",
    "        def if_contains_eval_label(example):\n",
    "            a = label_dict_eval.keys()\n",
    "            b = example['input_ids']\n",
    "            return not set(a).isdisjoint(b)\n",
    "        \n",
    "        # filter dataset for examples containing classes for this split\n",
    "        print(f\"Filtering training data\")\n",
    "        trainset = data.filter(if_contains_train_label, num_proc=num_proc)\n",
    "        print(f\"Filtered {round((1-len(trainset)/len(data))*100)}%; {len(trainset)} remain\\n\")\n",
    "        print(f\"Filtering evalation data\")\n",
    "        evalset = data.filter(if_contains_eval_label, num_proc=num_proc)\n",
    "        print(f\"Filtered {round((1-len(evalset)/len(data))*100)}%; {len(evalset)} remain\\n\")\n",
    "\n",
    "        # minimize to smaller training sample\n",
    "        training_size = min(subsample_size, len(trainset))\n",
    "        trainset_min = trainset.select([i for i in range(training_size)])\n",
    "        eval_size = min(training_size, len(evalset))\n",
    "        half_training_size = round(eval_size/2)\n",
    "        evalset_train_min = evalset.select([i for i in range(half_training_size)])\n",
    "        evalset_oos_min = evalset.select([i for i in range(half_training_size, eval_size)])\n",
    "        \n",
    "        # label conversion functions\n",
    "        def generate_train_labels(example):\n",
    "            example[\"labels\"] = [label_dict_train.get(token_id, -100) for token_id in example[\"input_ids\"]]\n",
    "            return example\n",
    "\n",
    "        def generate_eval_labels(example):\n",
    "            example[\"labels\"] = [label_dict_eval.get(token_id, -100) for token_id in example[\"input_ids\"]]\n",
    "            return example\n",
    "        \n",
    "        # label datasets \n",
    "        print(f\"Labeling training data\")\n",
    "        trainset_labeled = trainset_min.map(generate_train_labels)\n",
    "        print(f\"Labeling evaluation data\")\n",
    "        evalset_train_labeled = evalset_train_min.map(generate_eval_labels)\n",
    "        print(f\"Labeling evaluation OOS data\")\n",
    "        evalset_oos_labeled = evalset_oos_min.map(generate_eval_labels)\n",
    "        \n",
    "        # create output directories\n",
    "        ksplit_output_dir = os.path.join(output_dir, f\"ksplit{iteration_num}\")\n",
    "        ksplit_model_dir = os.path.join(ksplit_output_dir, \"models/\") \n",
    "        \n",
    "        # ensure not overwriting previously saved model\n",
    "        model_output_file = os.path.join(ksplit_model_dir, \"pytorch_model.bin\")\n",
    "        if os.path.isfile(model_output_file) == True:\n",
    "            raise Exception(\"Model already saved to this directory.\")\n",
    "\n",
    "        # make training and model output directories\n",
    "        subprocess.call(f'mkdir {ksplit_output_dir}', shell=True)\n",
    "        subprocess.call(f'mkdir {ksplit_model_dir}', shell=True)\n",
    "        \n",
    "        # load model\n",
    "        model = BertForTokenClassification.from_pretrained(\n",
    "            \"/gladstone/theodoris/lab/ctheodoris/archive/geneformer_files/geneformer/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/\",\n",
    "            num_labels=2,\n",
    "            output_attentions = False,\n",
    "            output_hidden_states = False\n",
    "        )\n",
    "        if freeze_layers is not None:\n",
    "            modules_to_freeze = model.bert.encoder.layer[:freeze_layers]\n",
    "            for module in modules_to_freeze:\n",
    "                for param in module.parameters():\n",
    "                    param.requires_grad = False\n",
    "                \n",
    "        model = model.to(\"cuda:0\")\n",
    "        \n",
    "        # add output directory to training args and initiate\n",
    "        training_args[\"output_dir\"] = ksplit_output_dir\n",
    "        training_args_init = TrainingArguments(**training_args)\n",
    "        \n",
    "        # create the trainer\n",
    "        trainer = Trainer(\n",
    "            model=model,\n",
    "            args=training_args_init,\n",
    "            data_collator=DataCollatorForGeneClassification(),\n",
    "            train_dataset=trainset_labeled,\n",
    "            eval_dataset=evalset_train_labeled\n",
    "        )\n",
    "\n",
    "        # train the gene classifier\n",
    "        trainer.train()\n",
    "        \n",
    "        # save model\n",
    "        trainer.save_model(ksplit_model_dir)\n",
    "        \n",
    "        # evaluate model\n",
    "        fpr, tpr, interp_tpr, conf_mat = classifier_predict(trainer.model, evalset_oos_labeled, 200, mean_fpr)\n",
    "        \n",
    "        # append to tpr and roc lists\n",
    "        confusion = confusion + conf_mat\n",
    "        all_tpr.append(interp_tpr)\n",
    "        all_roc_auc.append(auc(fpr, tpr))\n",
    "        # append number of eval examples by which to weight tpr in averaged graphs\n",
    "        all_tpr_wt.append(len(tpr))\n",
    "        \n",
    "        iteration_num = iteration_num + 1\n",
    "        \n",
    "    # get overall metrics for cross-validation\n",
    "    mean_tpr, roc_auc, roc_auc_sd = get_cross_valid_metrics(all_tpr, all_roc_auc, all_tpr_wt)\n",
    "    return all_roc_auc, roc_auc, roc_auc_sd, mean_fpr, mean_tpr, confusion, label_dicts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define Functions for Plotting Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot ROC curve\n",
    "def plot_ROC(bundled_data, title):\n",
    "    plt.figure()\n",
    "    lw = 2\n",
    "    for roc_auc, roc_auc_sd, mean_fpr, mean_tpr, sample, color in bundled_data:\n",
    "        plt.plot(mean_fpr, mean_tpr, color=color,\n",
    "                 lw=lw, label=\"{0} (AUC {1:0.2f} $\\pm$ {2:0.2f})\".format(sample, roc_auc, roc_auc_sd))\n",
    "    plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')\n",
    "    plt.xlim([0.0, 1.0])\n",
    "    plt.ylim([0.0, 1.05])\n",
    "    plt.xlabel('False Positive Rate')\n",
    "    plt.ylabel('True Positive Rate')\n",
    "    plt.title(title)\n",
    "    plt.legend(loc=\"lower right\")\n",
    "    plt.show()\n",
    "    \n",
    "# plot confusion matrix\n",
    "def plot_confusion_matrix(classes_list, conf_mat, title):\n",
    "    display_labels = []\n",
    "    i = 0\n",
    "    for label in classes_list:\n",
    "        display_labels += [\"{0}\\nn={1:.0f}\".format(label, sum(conf_mat[:,i]))]\n",
    "        i = i + 1\n",
    "    display = ConfusionMatrixDisplay(confusion_matrix=preprocessing.normalize(conf_mat, norm=\"l1\"), \n",
    "                                     display_labels=display_labels)\n",
    "    display.plot(cmap=\"Blues\",values_format=\".2g\")\n",
    "    plt.title(title)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fine-Tune With Gene Classification Learning Objective and Quantify Predictive Performance"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Please note that, as usual with deep learning models, we **highly** recommend tuning learning hyperparameters for all fine-tuning applications as this can significantly improve model performance. Example hyperparameters are defined below, but please see the \"hyperparam_optimiz_for_disease_classifier\" script for an example of how to tune hyperparameters for downstream applications."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set model parameters\n",
    "# max input size\n",
    "max_input_size = 2 ** 11  # 2048\n",
    "\n",
    "# set training hyperparameters\n",
    "# max learning rate\n",
    "max_lr = 5e-5\n",
    "# how many pretrained layers to freeze\n",
    "freeze_layers = 4\n",
    "# number gpus\n",
    "num_gpus = 1\n",
    "# number cpu cores\n",
    "num_proc = 24\n",
    "# batch size for training and eval\n",
    "geneformer_batch_size = 12\n",
    "# learning schedule\n",
    "lr_schedule_fn = \"linear\"\n",
    "# warmup steps\n",
    "warmup_steps = 500\n",
    "# number of epochs\n",
    "epochs = 1\n",
    "# optimizer\n",
    "optimizer = \"adamw\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# set training arguments\n",
    "subsample_size = 10_000\n",
    "training_args = {\n",
    "    \"learning_rate\": max_lr,\n",
    "    \"do_train\": True,\n",
    "    \"evaluation_strategy\": \"no\",\n",
    "    \"save_strategy\": \"epoch\",\n",
    "    \"logging_steps\": 100,\n",
    "    \"group_by_length\": True,\n",
    "    \"length_column_name\": \"length\",\n",
    "    \"disable_tqdm\": False,\n",
    "    \"lr_scheduler_type\": lr_schedule_fn,\n",
    "    \"warmup_steps\": warmup_steps,\n",
    "    \"weight_decay\": 0.001,\n",
    "    \"per_device_train_batch_size\": geneformer_batch_size,\n",
    "    \"per_device_eval_batch_size\": geneformer_batch_size,\n",
    "    \"num_train_epochs\": epochs,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define output directory path\n",
    "current_date = datetime.datetime.now()\n",
    "datestamp = f\"{str(current_date.year)[-2:]}{current_date.month:02d}{current_date.day:02d}\"\n",
    "training_output_dir = f\"/path/to/models/{datestamp}_geneformer_GeneClassifier_dosageTF_L{max_input_size}_B{geneformer_batch_size}_LR{max_lr}_LS{lr_schedule_fn}_WU{warmup_steps}_E{epochs}_O{optimizer}_n{subsample_size}_F{freeze_layers}/\"\n",
    "\n",
    "# ensure not overwriting previously saved model\n",
    "ksplit_model_test = os.path.join(training_output_dir, \"ksplit0/models/pytorch_model.bin\")\n",
    "if os.path.isfile(ksplit_model_test) == True:\n",
    "    raise Exception(\"Model already saved to this directory.\")\n",
    "\n",
    "# make output directory\n",
    "subprocess.call(f'mkdir {training_output_dir}', shell=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3da0ae9f71de4f8b982948a2a9807dfd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3224634f88c19116.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-5534ad8f3f0cf000.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-bfb98c01d951ae8d.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-29ac8ab551fb8961.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-03912be57f358581.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****** Crossval split: 0/4 ******\n",
      "\n",
      "Filtering training data\n",
      "Filtered 36%; 31897 remain\n",
      "\n",
      "Filtering evalation data\n",
      "Filtered 49%; 25258 remain\n",
      "\n",
      "Labeling training data\n",
      "Labeling evaluation data\n",
      "Labeling evaluation OOS data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
      "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "<ipython-input-15-21e1cede4c54>:45: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='834' max='834' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [834/834 01:33, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>0.684000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>0.617600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>0.477400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>0.334300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>500</td>\n",
       "      <td>0.229500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>600</td>\n",
       "      <td>0.152700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>700</td>\n",
       "      <td>0.125600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>800</td>\n",
       "      <td>0.104900</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-4d8947ed4c65f4a4.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-8a83f628e23d5548.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-c6c437341faa1cfe.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-2010c177e27e09d1.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-15543d980ad3cbb0.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-a81a942ab15e4aa3.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-5d2c963673bb1115.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-6c7cc476a9d722c3.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-e274abd189113bba.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-1aedba9e0b982e5c.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-6668161997480231.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d802b8093fb9c6f7.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3ea48baa5fe880e2.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-86024b6184e99afe.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-7a47db2c9f9758a4.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-af1f6b8f743677db.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-67cffffa35fa22f7.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-81ed63bd02a44ee5.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-6e5a21d4d57e333d.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-eecde81c07e6d036.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-fcc19fab82bb7115.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-ea856d7fa4e78b24.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-698344adb3749f61.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-ee3f9e89abdbee4c.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d98fd9d7fda61d3b.arrow\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAugElEQVR4nO3deVyU9d7/8dewCriAyqK4b4keS03NTqKJkiYiihp6yjzn1ruy9CzlLzXLtM2jZZ06WWqW3Voa921kmUumFWpuxy1cK0UEFyYXkE224fr9QVGkMIDODAPv5+PhQ2au71zXZ74Pvd5c2/drMgzDQEREpAwuji5ARESqNwWFiIiUS0EhIiLlUlCIiEi5FBQiIlIuBYWIiJRLQSEiIuVyc3QBItVZWFgYFy9exNXVFW9vb0JDQ3nmmWfw8fEBYP/+/fzrX//i0KFDuLi40LNnT6ZOnUq7du1K1pGVlcXrr7/Ol19+yZUrV2jcuDF33303kyZNomHDho76aiIVpiMKESsWLVrEgQMHWLNmDUePHmXJkiUAHDhwgAkTJjBgwAC2bdvGli1buOWWWxg7diwpKSkA5OfnM378eE6cOMHSpUvZt28fH330Eb6+vhw6dMiRX0ukwnREIVJB/v7+9OnTh2PHjgHw8ssvExUVxfjx40va/OMf/+DIkSP8+9//Zv78+Xz66aecP3+e5cuXlxyFNGrUiMcee8wh30GkKnREIVJBqampbNu2jRYtWnD16lUOHDjA4MGDr2l37733smPHDgB27NhBaGhoSUiIOCMdUYhY8ctv/zk5OfTu3Zu//vWvXLlyhaKiIvz9/a9p7+/vT1paGgDp6el07tzZrvWK3Gw6ohCxYuHChRw4cIAVK1aQmJhIWloa9evXx8XFhQsXLlzT/sKFC/j5+QHg6+t73TYizkRBIVJBvXr1Ijo6mnnz5uHt7U3Xrl3ZuHHjNe02bNhA7969AfjjH//I9u3bycnJsXe5IjeNgkKkEsaPH8+OHTs4duwYTzzxBGvWrGH58uVkZWVx5coVXnvtNQ4ePMjkyZMBiIqKIigoiClTpnDy5EmKiopIS0tj0aJFxMfHO/jbiFSMgkKkEho2bEhUVBRvvfUWPXr0YOnSpXz55ZeEhobSv39/jh07xsqVK2nVqhUAHh4evP/++7Rp04b/+q//4vbbb2f06NGkpaVx6623OvbLiFSQSRMXiYhIeXREISIi5VJQiIhIuRQUIiJSLgWFiIiUy+mezL7jjjsIDg52dBkiIk7l7Nmz7N69u0qfdbqgCA4OJi4uztFliIg4lejo6Cp/VqeeRESkXAoKEREpl4JCRETKpaAQEZFyKShERKRcCgoRESmXzYJixowZ3HnnnQwdOvS6yw3D4IUXXiA8PJzIyEiOHDliq1JEROQG2Ow5iujoaB544AGmTZt23eVbt24lKSmJTZs28d133zF79mz+7//+z1bliIidVWZg6sqMYV2Z4a4rVUOl1luJtpVYc3Udy9tmQdGzZ0/OnDlT5vItW7YwfPhwTCYTXbt2JSMjg59++omAgABblSRSJZYig7xCC/mFReQWFHEpO4/sPAvnr1wF4PSlHDzdXCgsMii0GBQWFVFYZJByOYd6ddzILzTILbSQcbWA/MKin9sV/52ZW0haTj4NvNyB0jsKwyjexRhG8c6m+O/ftvnte79pa/xm13S95b/5XH5hkc37T6qH7jfwWYc9mW02mwkKCip5HRQUhNlsvm5QxMbGEhsbC1Ayab3IjbqSU8CRc1dIyykg+XIOBgZ7k9K4mm8hNSOXzNwCLmblV3q9JhO4u7iAqXhHHOzrhae7C3XcXKlbx4067i64ebrh7mqimZ+J7DwLXh6u1PN0AxOYMJWsx1Tyt6n4bxPw88/FP/1uOWAy/fr5n1uXXpfp5y38/LkLmXkE+3lV/PtVsi8qvt6KN67ceivRtjLrrUxjW9VQiW/35Q9VKOZnDguK6x0SltXxMTExxMTEADf2GLrUPldyCki8mMWPP2WxK/ESR89lcCbtKrkFFgqLrv036OvtjqebC12CG+Bfrw51PV3JyrPQzM8LL3dXPNxcqFfHjUY+nni6u+Dr5U59L3e8PYqXubm44Opimx2IyI348o2qf9ZhQREUFERqamrJ69TUVJ12kiozZ+RyITOPfafT2J+cxtm0q+w9fe3Rp5uLiXYBdWnd2IfOTevTMag+zRt64+3hSkMfD3w8nW74MxGbc9j/irCwMD744AMiIiL47rvvqFevnoJCKqSoyOC7M+lsPmbmyLkMvvn+wjVt/Ot5MrxrU3y9Peja3JcOgfVoF1AXDzfdES5SWTYLiscff5w9e/aQlpZG3759mTJlCoWFhQCMHTuWfv36ER8fT3h4OF5eXrz00ku2KkWcnGEYHDufydJtiWz98UKp6wYeri7cfYs/QfXrENren2Z+XnRqWh93VwWCyM1is6B49dVXy11uMpl49tlnbbV5cWJn0nL44oiZQ2fS+cGcxdHzGSXLWjT0ZmCIH73bNGRIlyY09a34RVgRqRqdkBWHK7AUcfRcBjtOXuLd7Ymljhj863nyQO8WNK7rydBbm9AuoJ4DKxWpnRQU4hAJZ9JZd+g8R85msPf0ZXILiu/n9/FwZXjXpgz+QxD9Owbg6ebq4EpFREEhdnPsfAYf7zvDyj3J5ORbAGgXUJe7OwTQo5Uff2zbmFuC6un2UpFqRkEhNpVXaOHtb06y5sBZki7lANC4rgd/bNuIpyM60aqxj4MrFBFrFBRy0xUVGWw7cZHNR818evAsGbnFd7uN692ScXe2pEOgrjOIOBMFhdywrLxCvjicSkpaDrsTL7Mz8VLJsr4d/Bl1ezPu6RRIHXddbxBxRgoKqbL0nHze2ZbIwq9PlrwX7OvFfT2a0aqxD+N6t6ReHXcHVigiN4OCQipt3+k0/t//fUfixWwAGvp48PeB7RnSpQmN63o6uDoRudkUFFJhKZdzmPg/e/nenAlA47qezI3uwoCOAbjoTiWRGktBIeXKLyxi+c4kFsUncjErD1cXE6HtG/Pi8C60aOTt6PJExA4UFHJdh89eYdWeZD777hyZuYV4uLowqHMgU++5hfa6a0mkVlFQSCmHzlzhT0t3kfnzLa1B9evwwvA/cE+nILw8dNeSSG2koBCg+BbXueuP8eHuZADa+vuweNzttPWva7OZvETEOSgohJTLOQxYEE++pYgR3YKZMaQjAfXqOLosEakmFBS1mGEYvPdtEs9/fhSAh/q24akhIQ6uSkSqGwVFLXUwJZ1Znx4m4cwVvNxdeefBHvRp39jRZYlINaSgqGXif7jAtNUJpGbk4mKC8Xe25MnBHTVXtIiUSXuHWmTZt6eYs/Yobi4m7r+jBY/1b6cZ4kTEKgVFLfDFkVQe+3A/hUUGnZvWZ+XE3jTw1hhMIlIxCooarNBSxCubfmBRfPGgfQH1PPnood4aqE9EKkVBUUN9ddzMox/uJ7egiMZ1Pfl8Sh+CGuiWVxGpPAVFDXMwJZ3XvvyB+B8uAPBsZCfG39lKg/aJSJUpKGqIoiKDlzd9z9vfFJ9murNNIxbcd5suVovIDVNQODnDMPjmhws8v/YoiRez6RhUjzf/1I12ARq4T0RuDgWFE7uYlcd9i3aWTCA0b2QX7uvRXGMzichNpaBwUoviT/LPDccBGNw5iFfuu426emhORGxAexYnk19YxJOrv2PNwXN0a+HLnGGdubWZr6PLEpEaTEHhRK5cLWDyyv1s+/EifTv4s2Tc7dRx1xwRImJbCgonsWLXaZ5ZcxiA/w5tzcyITg6uSERqCwVFNXc8NYNHP9xP4oXiC9bv/bkHYR0DHVyViNQmCopq7M2vfuSVTT8AMDAkkJdG/IGA+nq6WkTsS0FRTa3YmVQSEjtnhNGkgR6cExHHcLHlyrdu3cqgQYMIDw9nyZIl1yzPzMzkkUceYdiwYURERPDxxx/bshyn8f7Pw4E3b+jFsecGKyRExKFsdkRhsVh47rnnWLZsGYGBgYwaNYqwsDDatWtX0ubDDz+kbdu2LFq0iMuXLzN48GAiIyPx8PCwVVnVmmEYzP7sCP+z8zQtG3nzwYQ78PLQXU0i4lg2C4qEhARatmxJ8+bNAYiIiGDLli2lgsJkMpGdnY1hGGRnZ9OgQQPc3Grn2bCUyzk8vGIfR89n0K+DP28/0B1vj9rZFyJSvdhsT2Q2mwkKCip5HRgYSEJCQqk2999/P5MmTSI0NJTs7Gxee+01XFyuPRsWGxtLbGwsAGlpabYq2WGOp2bwp3d2k56Tz5ODb2FSv7YahkNEqg2bBYVhGNe89/ud3/bt2wkJCWH58uUkJyfzl7/8hR49elC3bt1S7WJiYoiJiQEgOjraViU7xK7ES/z38r3kFRSx7C+96NfB39EliYiUYrOL2UFBQaSmppa8NpvNBAQElGoTFxfHPffcg8lkomXLljRr1ozExERblVTtfHnUzJgluzABsQ/3VkiISLVks6Do0qULSUlJpKSkkJ+fz7p16wgLCyvVpkmTJuzcuROAixcvcurUKZo1a2arkqqVfafTeHjFXvy83dn6ZH+6tfBzdEkiItdls1NPbm5uzJo1i4kTJ2KxWBg5ciTt27dn1apVAIwdO5ZHH32UGTNmEBkZiWEYTJ06lYYNG9qqpGpjydaTvLT+OCYTvPfnnvh61867vETEOZiM611MqMaio6OJi4tzdBlVFv/DBca/tweAjyfdye0ta34wiojj3ci+U/df2tEnB87wj9jv8HJ3Ze2UuzQLnYg4BQWFnSz8+gQvf/E9Dbzc+fSxu2jV2MfRJYmIVIiCwg7+k3SZl7/4npaNvFk7pQ/167g7uiQRkQqz6VhPArkFFp743+8AmD/yVoWEiDgdBYUNGYbB5JX7Sb6cw8whIdzRppGjSxIRqTQFhQ39v9UJbD72EwNDAvnvvm0cXY6ISJUoKGzklS++Z/W+M9T1dGPRA90dXY6ISJUpKGzgw92nefPrEwD8Z+ZA3FzVzSLivCq8B8vJybFlHTVGyuUcZn5ymGBfL3Y/NUDzSYiI07MaFPv372fIkCEMGTIEgOPHjzN79mxb1+W0nvrkEAAvjPgDgZrfWkRqAKtBMXfuXN599118fX0B6NixI3v37rV1XU7pX5t/YNuPFxl1ezP63xJg/QMiIk6gQqeemjRpUvpD15lcqLbb9uMF/rX5RwCeGhLi4GpERG4eq09mN2nShP3792MymcjPz2fFihW0bdvWHrU5jS+OpPLwin14uLqw/m+hNPTRaLAiUnNYPTSYPXs2H374IWazmX79+nHs2DGeffZZe9TmFA6fvcKkD/bhX8+TzY/3o11AXesfEhFxIlaPKE6dOsWCBQtKvbdv3z5uv/12mxXlLIqKDO5bvJMiA2If6k2LRt6OLklE5KazekTxwgsvVOi92mjp9kRy8i083K8Nbfx1JCEiNVOZRxQHDhzgwIEDXL58mWXLlpW8n5WVhcVisUtx1VlugYWX1h/H3dXEPwZ2cHQ5IiI2U2ZQFBQUkJOTg8ViITs7u+T9unXr8sYbb9iluOrstc0/ADBraCfquOuhOhGpucoMil69etGrVy9GjBhBcHCwPWuq9naevMTi+ES6tfDl/jtaOrocERGbsnox28vLi3nz5nHixAny8vJK3l++fLlNC6uuLEUGf489gMkEC//UHRcXk6NLEhGxKasXs6dOnUqbNm04c+YMkydPJjg4mC5dutijtmrpnW2JmDPymHBXa5r6ejm6HBERm7MaFOnp6YwePRo3Nzd69erF3Llz+e677+xRW7VzIDmNf244TuvGPky7t6OjyxERsQurp57c3IqbBAQE8M033xAQEEBqaqrNC6tuDMMomdJ0xYReuGvocBGpJawGxaRJk8jMzGTatGk8//zzZGdn89RTT9mjtmrlX5t/JPFiNhP6tKaZnx6sE5Haw2pQ9O/fH4B69eqxYsUKoPjJ7Nrkar6F17cUD/g3Q6ecRKSWKTMoLBYLGzZswGw2ExoaSocOHfj6669ZvHgxubm5rFmzxo5lOtai+JMATO7fTrPViUitU2ZQzJw5k/Pnz3PrrbfywgsvEBwczIEDB5g6dSoDBw60Z40OlZ1XyIpdpwH428D2Dq5GRMT+ygyKw4cP89lnn+Hi4kJeXh69e/dm06ZN+Pv727M+h9t0NJXL2fm8FnObLmCLSK1U5p7P3d29ZIIiT09PWrVqVetCAmDZt0nUq+NG1G16Ol1EaqcyjygSExOJjIwseZ2cnFzq9dq1a21bWTWwO/ESCWeuENOjuZ7AFpFaq8ygWL9+vT3rqJYWbCoe+O+JQRodVkRqrzKDorYPBFhoKWJP0mW6t/AloF4dR5cjIuIwNr06u3XrVgYNGkR4eDhLliy5bpvdu3cTFRVFREQEDzzwgC3LqZQPdycDcO8fmji4EhERx7L6wF1VWSwWnnvuOZYtW0ZgYCCjRo0iLCyMdu3albTJyMhgzpw5LF26lKZNm3Lp0iVblVMphmHw5tcnaOjjwfg/tnJ0OSIiDlWhI4rc3FwSExMrteKEhARatmxJ8+bN8fDwICIigi1btpRqs3btWsLDw2natCkAjRo1qtQ2bOWb7y9wITOPEd2C8XDTLbEiUrtZ3Qt+9dVXREVFMXHiRACOHTvGI488YnXFZrOZoKCgkteBgYGYzeZSbZKSksjIyGDcuHFER0dXm6e9V+0pPu30cN82Dq5ERMTxrJ56evPNN1m9ejXjxo0DICQkhLNnz1pdsWEY17xnMpW+xdRisXDkyBHef/99cnNzGTNmDLfddhutW7cu1S42NpbY2FgA0tLSrG77Ru04eYlgXy8C6usitoiI1aBwdXWlXr16lV5xUFBQqeHIzWYzAQEB17Tx8/PD29sbb29vevTowfHjx68JipiYGGJiYgCIjo6udC2V8VNGLll5hUR00UVsERGowKmn9u3bs3btWiwWC0lJSTz//PN069bN6oq7dOlCUlISKSkp5Ofns27dOsLCwkq1GTBgAHv37qWwsJCrV6+SkJBA27Ztq/5tboI5a48CMKJ77b49WETkF1aPKJ555hkWLVqEh4cHTzzxBH369OHRRx+1vmI3N2bNmsXEiROxWCyMHDmS9u3bs2rVKgDGjh1L27ZtCQ0NZdiwYbi4uDBq1Cg6dHDcw225BRbWHToPQO821ePCuoiIo5mM611M+I2jR4/SqVMne9VjVXR0NHFxcTZZ91vfnGD+xu95akhHHurr2CMbEZGb6Ub2nVaPKObOncuFCxcYPHgwERERtG9fM4faNgyD5TuKhxOf2Ed3O4mI/MJqUKxYsYILFy6wYcMGnnnmGbKzs7n33nsrdPrJmRxPzSQ1I5fH+rfVAIAiIr9RoafJ/P39efDBB5kzZw4dO3bkrbfesnVddnfipyxA1yZERH7P6hHFyZMnWb9+PV988QW+vr4MGTKE6dOn26M2u9r+40UAWjT0dnAlIiLVi9WgmDFjBhEREbz77rsEBgbaoyaH+PZkcVA091NQiIj8ltWg+N///V971OFQ569c5UzaVcb01ARFIiK/V2ZQ/O1vf+P1118vNavdb9WkGe6+PFo8BtUDvVs6uBIRkeqnzKCYOXMmAIsWLbJbMY7yfWom3h6udG5a39GliIhUO2Xe9fTLuEwrV64kODi41J+VK1farUB7OJt+lcZ1Pa8ZtFBERCpwe+yOHTuueW/r1q02KcYRruZb+Ob7C7QPqOvoUkREqqUyTz2tXLmSVatWkZKSUuo6RXZ2Nt27d7dLcfbw7Yniu506BzdwcCUiItVTmUERGRlJ3759efXVV3niiSdK3vfx8cHX19cetdnF+p8HAXzwTl3IFhG5njKDwmQy0axZM2bNmnXNsvT09BoTFtn5hbiYoHFdT0eXIiJSLZUZFE888QSLFy8mOjoak8lUasY6k8l0zfzXzup4aiY9WjV0dBkiItVWmUGxePFioHjO7JoqO6+Q05dyGNQ5yHpjEZFayupdT/v27SMnJweATz/9lLlz53Lu3DmbF2YPCWeuABDSpPJTvYqI1BZWg2L27Nl4eXlx/Phxli5dStOmTXnyySftUZvN7fx5fKfWjXVrrIhIWawGhZubGyaTic2bN/Pggw8yfvx4srOz7VGbze1PTgegY5COKEREymI1KHx8fFi8eDGfffYZd999NxaLhcLCQnvUZnMHU9Lx8XCljruro0sREam2rAbFa6+9hoeHBy+99BL+/v6YzWYmTJhgj9psKi07n6y8Qrq39HN0KSIi1ZrVoPD39ycyMpLMzEy+/vprPD09GT58uB1Ks63dpy4BENGliYMrERGp3qwGxfr16xk9ejQbN25kw4YNJT87u0Nni+94Gta1qYMrERGp3qxOXLRo0SJWr15No0bFc0lfvnyZP//5zwwePNjmxdnSsfOZ+Hm74+1htQtERGo1q0cUhmGUhASAr69vqae0ndXhs1fw8VRIiIhYY3VP2adPHyZMmEBERARQfCqqb9++Ni/MloqKDH7KzKNbC19HlyIiUu1ZDYpp06axadMm9u3bh2EYxMTEEB4ebo/abCb5cvGT5l00tLiIiFVlBkVSUhLz5s0jJSWFDh06MG3aNAIDA+1Zm81sOpoKwPBuwQ6uRESk+ivzGsVTTz1F//79eeONN+jcuTPPP/+8PeuyqZW7kwHo1tzXsYWIiDiBMo8osrOzue+++wBo06YNI0aMsFtRtpZ0KYdGPh6aI1tEpALKDIq8vDyOHj1acodTbm5uqdedO3e2T4U3WXpOPgB3tNEcFCIiFVFmUPj7+zN37tyS140bNy55bTKZWL58ue2rs4GP/pMCwICONeN6i4iIrZUZFCtWrLBnHXazak/x9YmIWzV0h4hIRVh94K6mSb2SS+O6nhoxVkSkgmwaFFu3bmXQoEGEh4ezZMmSMtslJCQQEhJi8zGkDMMgr7BI1ydERCrBZkFhsVh47rnnWLp0KevWrePzzz/nxIkT1233yiuv0KdPH1uVUmL3qcuAHrQTEamMCo319Omnn/Lmm28CcO7cORISEqyuOCEhgZYtW9K8eXM8PDyIiIhgy5Yt17RbsWIFgwYNKjWelK2s/a54ru+BIQE235aISE1RoTmzDx48yLp164DiGe/mzJljdcVms5mgoKCS14GBgZjN5mvabN68mTFjxpS7rtjYWKKjo4mOjiYtLc3qtsvyozkLgHYBmvpURKSirAZFQkICzz77LJ6engA0aNCAgoICqyu+3gizv3/A7cUXX2Tq1Km4upZ/YTkmJoa4uDji4uLw86v6jHR7ki7TprFPlT8vIlIbWR0U0M3NDYvFUrKTv3z5Mi4u1i9tBAUFkZqaWvLabDYTEFD6lM/hw4d5/PHHAUhLSyM+Ph43NzcGDhxYqS9REd+nZgLQo5WmPhURqQyrQTFu3Dgee+wxLl26xGuvvcbGjRv5+9//bnXFXbp0ISkpiZSUFAIDA1m3bh0LFiwo1earr74q+Xn69OncfffdNgkJgDUHzwIwsnszm6xfRKSmshoUw4YNo3PnzuzatQvDMHjrrbdo27at9RW7uTFr1iwmTpyIxWJh5MiRtG/fnlWrVgEwduzYG6++ElbvOwNAr9a6NVZEpDKsBsW5c+fw8vKif//+pd5r2tT6XNP9+vWjX79+pd4rKyD++c9/Wl3fjfBwdSGwvqcGAhQRqSSrQfHwww+X/JyXl8eZM2do3bp1yV1QzuJs+lXG9mrh6DJERJyO1aBYu3ZtqddHjhwhNjbWZgXZwi8jxnq46mhCRKSyKv1kdufOnTl06JAtarGZfaeLn71oF1DXwZWIiDgfq0cUy5YtK/m5qKiIo0eP0rChc10QzsorBOA2zWgnIlJpVoMiOzu75GdXV1f69evHoEGDbFrUzZaZWxwUDX08HFyJiIjzKTcoLBYL2dnZTJs2zV712MTBlHQAGtf1dGwhIiJOqMxrFIWFhbi6unL06FF71mMTJy8Uj/GkOShERCqvzCOK0aNH88knnxASEsIjjzzC4MGD8fb2Lll+zz332KXAm+GEOYsWDb2tNxQRkWtYvUZx5coV/Pz82L17d6n3nSUosvIKycwrpO8t/o4uRUTEKZUZFJcuXWLZsmW0b98ek8lUajRYZ3q6+fj5DAD+2Nb2812IiNREZQZFUVFRqTuenFX8DxcA6NSkvoMrERFxTmUGhb+/P5MnT7ZnLTbxU0YeAN1aaHhxEZGqKPOup+tNPOSMTl3MplUjXcgWEamqMoPi/ffft2MZtnMpOw8XJ7qmIiJS3ZQZFL6+vnYswzZyCywkX86hbwfd8SQiUlWVHhTQmew4eZECi8GduuNJRKTKanRQrD9UPGd37zYKChGRqqrRQXEgOQ1XFxMNvNwdXYqIiNOq0UFx5Wohzf28HF2GiIhTq7FBUWAp4mJWHu0C6jm6FBERp1Zjg+LTg+cAiOra1MGViIg4txobFBsPnwcgoksTB1ciIuLcamxQ7Dh5CQ83F1xc9LCdiMiNsDrMuLPy8XTDw7XG5qCIiN3UyD1pek4+FzLzGNEt2NGliIg4vRoZFLtPXQbgD8EaWlxE5EbVyKBYve8MAL1a64lsEZEbVSOD4mJW8RwUDX08HFyJiIjzq3FBYRgGB5LTCdGMdiIiN0WNC4qDKekA3BrcwLGFiIjUEDUuKH65PjExtLWDKxERqRlqXFAcPZ8BQLuAug6uRESkZrBpUGzdupVBgwYRHh7OkiVLrln+2WefERkZSWRkJGPGjOH48eM3tL1CSxEHktMJbd8Yk6Y/FRG5KWwWFBaLheeee46lS5eybt06Pv/8c06cOFGqTbNmzfjggw9Yu3YtkyZN4plnnrmhbe5KLH5+IrR94xtaj4iI/MpmQZGQkEDLli1p3rw5Hh4eREREsGXLllJtunfvToMGxRedu3btSmpq6g1t87sz6QCEttcc2SIiN4vNxnoym80EBQWVvA4MDCQhIaHM9qtXr6Zv377XXRYbG0tsbCwAaWlpZa4jO68QgNaNfapSsoiIXIfNgsIwjGveK+u6wa5du1i9ejUrV6687vKYmBhiYmIAiI6OLnObqRm5+Hq7U8fdtQoVi4jI9dgsKIKCgkqdSjKbzQQEBFzT7vjx4zz99NO88847+Pn53dA2951Oo5mmPhURualsdo2iS5cuJCUlkZKSQn5+PuvWrSMsLKxUm3PnzjFlyhTmz59P69Y39tyDpcjgclY+3u41duR0ERGHsNle1c3NjVmzZjFx4kQsFgsjR46kffv2rFq1CoCxY8eycOFC0tPTmTNnDgCurq7ExcVVaXvHzmeQmVdIpKY+FRG5qWz663e/fv3o169fqffGjh1b8vOLL77Iiy++eFO29eVRMwBhHa89vSUiIlVXI57MLioy+PdXP/KH4Po0bVDH0eWIiNQoNSIofvwpiyIDwm4J0BPZIiI3WY0Iim0/XgDgbp12EhG56WpEUFzMygegi4YWFxG56WpEUKReuUoDL3fcXWvE1xERqVacfs9qGAabj/2kYTtERGzE6YPi/JVcsvIK6d7ixp7qFhGR63P6oDibfhWA25rr+oSIiC04fVCs3J0MwJ1tGjm4EhGRmsnpg2LHyYt4e7gSUF8P2omI2IJTB0VugQVzRh69dTQhImIzTh0Um48Vj+80IEQP2omI2IpTB8XB5HQA7ukUVH5DERGpMqcOikNnrwDgX8/TwZWIiNRcThsUuQUWdp+6zACN7yQiYlNOGxRfHCmeZvXeLk0cXImISM3mtEGx8XBxUAzpousTIiK25LRBceRcBm0a++DtoTmyRURsySmDoqjI4ExaDn9sp+cnRERszSmDYlfiJYoMuLWZr6NLERGp8ZwyKOJ/ntHurnaNHVyJiEjN55RBkXghG4BgXy8HVyIiUvM5ZVCcvpSNj4ero8sQEakVnDIoLmTmEajRYkVE7MIpgyItp0BTn4qI2InTBUVhkQFofCcREXtxuqDIzC0E4O5bNMaTiIg9OF1Q5BZYALijdUMHVyIiUjs4XVDkFxYB4Ofj4eBKRERqB6cLiqy8QiI0YqyIiN04XVAUGQa3t/RzdBkiIrWG0wUFoGcoRETsyCmDQs9QiIjYj02DYuvWrQwaNIjw8HCWLFlyzXLDMHjhhRcIDw8nMjKSI0eOVGi9LRp53+xSRUSkDDYLCovFwnPPPcfSpUtZt24dn3/+OSdOnCjVZuvWrSQlJbFp0yaef/55Zs+eXaF1a5wnERH7sVlQJCQk0LJlS5o3b46HhwcRERFs2bKlVJstW7YwfPhwTCYTXbt2JSMjg59++qn8gk0mTCaTrcoWEZHfsdk8omazmaCgX+ezDgwMJCEhodw2QUFBmM1mAgJKP3UdGxtLbGwsAO5ZqURHR9uqbKeSlpaGn5/uAAP1xW+pL36lvvjVqVOnqvxZmwWFYRjXvPf7I4GKtAGIiYkhJiYGgOjoaOLi4m5Slc5NffEr9cWv1Be/Ul/86kZ+wbbZqaegoCBSU1NLXl/vSOH3bVJTU69pIyIijmWzoOjSpQtJSUmkpKSQn5/PunXrCAsLK9UmLCyMNWvWYBgGBw8epF69egoKEZFqxmanntzc3Jg1axYTJ07EYrEwcuRI2rdvz6pVqwAYO3Ys/fr1Iz4+nvDwcLy8vHjppZesrveXU1Civvgt9cWv1Be/Ul/86kb6wmRc70KBiIjIz5zyyWwREbEfBYWIiJSr2gaFrYb/cEbW+uKzzz4jMjKSyMhIxowZw/Hjxx1QpX1Y64tfJCQkEBISwsaNG+1YnX1VpC92795NVFQUERERPPDAA3au0H6s9UVmZiaPPPIIw4YNIyIigo8//tgBVdrejBkzuPPOOxk6dOh1l1d5v2lUQ4WFhcaAAQOM5ORkIy8vz4iMjDR+/PHHUm2++eYbY8KECUZRUZFx4MABY9SoUQ6q1rYq0hf79u0z0tPTDcMo7pfa3Be/tBs3bpwxceJEY8OGDQ6o1PYq0hdXrlwx7r33XuPs2bOGYRjGxYsXHVGqzVWkL95++21j/vz5hmEYxqVLl4yePXsaeXl5jijXpvbs2WMcPnzYiIiIuO7yqu43q+URha2G/3BGFemL7t2706BBAwC6du1a6tmUmqQifQGwYsUKBg0aRKNGjRxQpX1UpC/Wrl1LeHg4TZs2Baix/VGRvjCZTGRnZ2MYBtnZ2TRo0AA3N5vd9OkwPXv2LNkXXE9V95vVMiiuN/yH2Wwut80vw3/UNBXpi99avXo1ffv2tUdpdlfRfxebN29mzJgx9i7PrirSF0lJSWRkZDBu3Diio6NZs2aNnau0j4r0xf3338/JkycJDQ1l2LBhzJw5ExeXarn7s6mq7jerZaQaN3H4D2dXme+5a9cuVq9ezcqVK21dlkNUpC9efPFFpk6diqtrzR5huCJ9YbFYOHLkCO+//z65ubmMGTOG2267jdatW9urTLuoSF9s376dkJAQli9fTnJyMn/5y1/o0aMHdevWtVeZ1UJV95vVMig0/MevKtIXAMePH+fpp5/mnXfeqbGDoFWkLw4fPszjjz8OFA8IFx8fj5ubGwMHDrRrrbZW0f8jfn5+eHt74+3tTY8ePTh+/HiNC4qK9EVcXBwPPfQQJpOJli1b0qxZMxITE7n11lvtXa5DVXW/WS2PvTT8x68q0hfnzp1jypQpzJ8/v8btBH6rIn3x1VdflfwZNGgQzz77bI0LCahYXwwYMIC9e/dSWFjI1atXSUhIoG3btg6q2HYq0hdNmjRh586dAFy8eJFTp07RrFkzR5TrUFXdb1bLIwpbDf/hjCrSFwsXLiQ9PZ05c+YA4OrqWiNHzKxIX9QWFemLtm3blpyTd3FxYdSoUXTo0MHBld98FemLRx99lBkzZhAZGYlhGEydOpWGDRs6uPKb7/HHH2fPnj2kpaXRt29fpkyZQmFhIXBj+00N4SEiIuWqlqeeRESk+lBQiIhIuRQUIiJSLgWFiIiUS0EhIiLlUlBItRQSEkJUVFTJnzNnzpTZtlu3bje8venTpxMWFkZUVBQjRozgwIEDlV7HzJkzOXHiBACLFi0qtexmDSnyS78MHTqURx55hIyMjHLbHzt2jPj4+Juybam9dHusVEvdunWr8M66Mm3LMn36dO6++24GDx7M9u3bmTdvHmvXrq3y+m5GTdbWO23aNFq1asWkSZPKbB8XF8fhw4eZNWvWTa9Fag8dUYhTyM7OZvz48YwYMYLIyEg2b958TZuffvqJ+++/v+Q37r179wLF4/zExMQwYsQI/vrXv5KdnV3utnr27ElycjIAy5YtY+jQoQwdOpT3338fgJycHB566CGGDRvG0KFDWb9+PQDjxo3j0KFDvPLKK+Tm5hIVFcUTTzwB/HrU8/e//73Ub/jTp0/niy++wGKxMG/ePEaOHElkZCQfffSR1T7p2rVryYBuCQkJjBkzhuHDhzNmzBgSExPJz8/njTfeYP369URFRbF+/XpycnKYMWMGI0eOZPjw4dftR5Fr3NDg5yI20rFjR2PYsGHGsGHDjEcffdQoKCgwMjMzDcMonk9g4MCBRlFRkWEYhtG1a1fDMAzj3XffNd566y3DMIrnKMjMzDQuXbpk/OlPfzKys7MNwzCMxYsXG//+97+v2d60adNK5q5Yv369MWrUKOPQoUPG0KFDjezsbCMrK8sYMmSIceTIEWPjxo3GzJkzSz6bkZFhGIZhPPDAA0ZCQkKpmn7xy+tNmzYZTz75pGEYhpGXl2f07dvXuHr1qvHRRx8ZCxcuLHl/xIgRRnJy8jV1/rKewsJCY8qUKUZ8fLxhGIaRmZlpFBQUGIZhGN9++60xefJkwzAM4+OPPzbmzJlT8vkFCxYYa9asMQyjeL6Ke+65p6RvRMpSLYfwEKlTpw6ffvppyeuCggJeffVV/vOf/+Di4oLZbObixYv4+/uXtOnSpQtPPfUUhYWFDBw4kJCQEL7++mtOnDhRMrxHQUEBXbt2ve4258+fz9tvv03Dhg158cUX2blzJwMHDsTb2xuA8PBw9u7dS2hoKPPmzePll1+mf//+9OjRo8Lfq2/fvrzwwgvk5+ezdetWevToQZ06dfj222/5/vvv+eKLL4DiGdlOnz5N8+bNS33+lyOVs2fP0rlzZ+66666S9tOmTeP06dOYTCYKCgquu/3t27fz1Vdf8d577wGQl5fH+fPna+QYUHLzKCjEKaxdu5bLly8TFxeHu7s7YWFh5OXllWrTs2dPPvjgA+Lj43nyySeZMGEC9evX56677uLVV1+1uo0nn3ySwYMHl7zesWPHddu1bt2auLg44uPjWbBgAXfddReTJ0+u0Pfw9PSkV69ebNu2jQ0bNhAREQEUD//89NNPExoaWu7nfwnQzMxMHn74YT788EMefPBBXn/9de644w4WLlzImTNnePDBB8tcxxtvvEGbNm0qVK8I6BqFOInMzEwaNWqEu7s7u3bt4uzZs9e0OXv2LI0aNeK+++5j5MiRHDlyhK5du7J//35Onz4NwNWrVzl16lSFttmzZ082b97M1atXycnJYfPmzfTo0QOz2YyXlxdRUVFMmDCBo0ePXvNZNze3Mn+rj4iIIC4ujr1799KnTx8A+vTpw6pVq0o+c+rUKXJycsqsrV69ejz99NO89957FBQUkJmZSWBgIACffPJJSTsfH59S12T69OnDBx98UDIvwfVqF/k9HVGIU4iMjGTSpElER0cTEhJy3d+I9+zZw7vvvoubmxve3t7MmzePhg0bMnfuXB5//HHy8/OB4gvKFRmOvXPnzkRHRzN69GgARo0aRadOndi2bRvz58/HxcUFNzc3Zs+efc1n77vvPoYNG0anTp1YsGBBqWV33XUX06ZNIywsDA8PDwBGjx7N2bNniY6OxjAM/Pz8eOutt8qtr1OnTnTs2JF169YxceJEpk+fzrJly+jdu3dJmzvuuIMlS5YQFRXFww8/zKOPPspLL73EsGHDMAyD4OBgFi9ebLUvpHbT7bEiIlIunXoSEZFyKShERKRcCgoRESmXgkJERMqloBARkXIpKEREpFwKChERKdf/BySMCvqgAUcIAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-1cc2a7963b74376c.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-25d39eb14def0850.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****** Crossval split: 1/4 ******\n",
      "\n",
      "Filtering training data\n",
      "Filtered 35%; 32406 remain\n",
      "\n",
      "Filtering evalation data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-407cdf2a13a57414.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-5b5ee37df8a97b60.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Filtered 52%; 23996 remain\n",
      "\n",
      "Labeling training data\n",
      "Labeling evaluation data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-26e9dc90c3620d42.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Labeling evaluation OOS data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
      "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "<ipython-input-15-21e1cede4c54>:45: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='834' max='834' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [834/834 01:33, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>0.658900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>0.585400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>0.474600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>0.346600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>500</td>\n",
       "      <td>0.257400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>600</td>\n",
       "      <td>0.185800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>700</td>\n",
       "      <td>0.134200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>800</td>\n",
       "      <td>0.114500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-cbfcb02a16dd9d81.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-b151d664d8c68613.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-52266cf801a76344.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-5c7ceff44bad692c.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-81bcbb23e61bfc0c.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-e99a8c7eedd34769.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-6d7d5150907035d9.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-735b525b0abf0f74.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-9a47cf8290cd2f6b.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-56deb15eec02ca33.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-2aea162267b33f73.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3bc7a169c841323d.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-1f67206928846c7a.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-88375062775280fb.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-bb45ebd2db699b53.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-fd6e4344cc2f8033.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-b8a9338cde5e5801.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-c013876f43a71ad7.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-148c328cb89da5c3.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-488b3d116a6d3b19.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-835e3e1538e24397.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d176e8ab14f1ce28.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3451fb13f869a5b0.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-56f270f895acc3ff.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-db497551e7a1e808.arrow\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAsFUlEQVR4nO3deXxM5x4/8M8sEmKLRBYStURUuIg2lpZYQgQRYUJDW3VbfpTS24trqVpr+dGqWy1FKT+U5l5NLQ2qtqC2IhVC2kaESGSQPZN98vz+COOmySyJzJkMn/fr1dfNzHnmnO88tz2fec7yHJkQQoCIiEgPuaULICKimo1BQUREBjEoiIjIIAYFEREZxKAgIiKDGBRERGQQg4KIiAxSWroAoprMz88PDx8+hEKhgJ2dHXx9fTFv3jzUrVsXAHD58mX8+9//xtWrVyGXy9GlSxfMmDEDrVu31q0jJycHn3/+OX7++WdkZmaicePG6NOnDyZNmgQHBwdLfTUik3FEQWTE+vXrERUVhT179uD69evYuHEjACAqKgrjxo1Dv379cOrUKRw9ehQvvvgiRo8ejcTERABAYWEhxo4di7i4OGzatAmXLl3Cd999B3t7e1y9etWSX4vIZBxREJnIyckJPXv2xI0bNwAAn3zyCYKDgzF27Fhdm3/+85+IiYnBF198gZUrV2Lv3r24d+8etm3bphuFODo64r333rPIdyCqCo4oiEyUkpKCU6dO4YUXXkBeXh6ioqIwcODAcu0GDRqEM2fOAADOnDkDX19fXUgQWSOOKIiMePzrPzc3F927d8f777+PzMxMlJSUwMnJqVx7JycnpKenAwAyMjLQvn17Seslqm4cURAZsXbtWkRFRWH79u2Ij49Heno6GjRoALlcjgcPHpRr/+DBAzRq1AgAYG9vX2EbImvCoCAyUdeuXaFSqbBixQrY2dnB29sbhw4dKtfu4MGD6N69OwDg1VdfxenTp5Gbmyt1uUTVhkFBVAljx47FmTNncOPGDUyfPh179uzBtm3bkJOTg8zMTKxevRq//fYbpkyZAgAIDg6Gq6srpk6dips3b6KkpATp6elYv349IiMjLfxtiEzDoCCqBAcHBwQHB2PdunXw8fHBpk2b8PPPP8PX1xd9+/bFjRs3sHPnTrRo0QIAYGNjg61bt6JVq1Z455138PLLL2PkyJFIT09Hx44dLftliEwk44OLiIjIEI4oiIjIIAYFEREZxKAgIiKDGBRERGSQ1d2Z3a1bN7i5uVm6DCIiq5KUlITz589X6bNWFxRubm4IDw+3dBlERFZFpVJV+bM89ERERAYxKIiIyCAGBRERGcSgICIigxgURERkEIOCiIgMMltQzJkzB6+88gqGDBlS4XIhBJYsWQJ/f38EBQUhJibGXKUQEdFTMNt9FCqVCm+++SZmzZpV4fKTJ08iISEBhw8fxpUrV7Bw4UL897//NVc5REQVMjSBtr5F+j5hcF2V3kbl6zIXswVFly5dcPfuXb3Ljx49imHDhkEmk8Hb2xtZWVm4f/8+nJ2dzVUS0VMRQqBQW4KC4hJk5xcjO78IhcUlKCwuwcOcAshkMhQUlyAlMw/yR3/feqhBPVslCh61i3+YA8e6NhACKBECJY/+F3j0uqT0f4Uo3VH8npINlwa1dcsFSncS4tFnBUo/87g+3XviSTvx6LMZuUWopZBBIZdV8N30fGe9nWGgn/QsNLRzq+yO19B+kg9OqNhLT/FZi92ZrVar4erqqnvt6uoKtVpdYVCEhYUhLCwMAHQPrSeqKvFop5mZV4QHOQUo1grkF2txLyMfchlwLzMfD3IKcC8jD2m5RbidqoFCJkOqprDK22xczxa2SjlqKWT4U52DFo3tIJfJIJPJIAMglwFymezRe4BcDshlcrRr2gDZ+cVo7mgHGUqXyWQyyGV49LnSP2R49N6j9Txu97iN7FE2pGoK4W5fp+Iiy+fHo7crXiDT097Aqox8pnLbMbAqvR+ydF16P2OogMquS08FP/9R6U3oWCwoKvqloK+zQkNDERoaCuDpbkOn50N+kRbJGXm4mpSJqDsZiE3JQlJGHlIy86GQl/7SN/ars76tEg3taqGguARerg2gkMvQsnFd1LFRwLm+LerYKNCgdi3UqaWAjVIOhVwGOxsF6tdWwlapQB0bBerZKmGrlFdpJ0BU3X5eU/XPWiwoXF1dkZKSonudkpLCw05UKUII3E3Pw5/3s/Fj9D3cfKDBlcSMcu1slXI41beFT3MHNKxTC21c6qF+7VpwrGcDmQxwrl8bdWwUsFHIYW9XCw3q1EKD2rWk/0JENZTFgsLPzw87duxAYGAgrly5gvr16zMoyKAriRn4f2cTEHc/BwAQfTezzHKn+rYY3MEVNgo5Orrbo4N7Q7Rr0gB1ba1u7kuiGsVs/wVNmzYNFy5cQHp6Onr16oWpU6eiuLgYADB69Gj07t0bkZGR8Pf3R506dbBs2TJzlUJWKDEtFyf+eIATsfcR/1CDWw81umU2SjlecLDDiJfd4dLAFt7NGqHzC/ZoXM/WghUTPbvMFhSfffaZweUymQwLFiww1+bJiuQXafFrQhrOx6fhXmY+zt9Kxd30PN3ypg1ro0uLRvBq0gATe3vATd/JWCIyC47JySIeZBfgs59/x+8p2bh8J6PMMl/PxlB1dkNHd3v0ftEJtRScQIDIkhgUJJmHOQX44XISfohKwvV7Wbr3B/3NFZ2a2WNAOxe0bFyXVwkR1TAMCjKLwuISHL2hxo2UbJz4/X6ZE89u9nUwvLMb/Nu5YHCHJhaskohMwaCgaiOEwIGrKfghKgnn4lORU1B68YJLA1t0amaPdk0aYEjHJujRurGFKyWiymBQ0FPRlggcjknB3t+ScSjmyX0xr3o4YuyrLdC7jRNq11JYsEIieloMCqq0gmItDly9hzNxqdh7JRmFxaWTDfk/Oscwxa81b1gjeoYwKMhk+68kY9OpeFz5y/mGkJfd8U6PFrC3s7FgdURkLgwK0qtYW4KtZxJw+U46jsc+QF6RFgAwoJ0LBrR3hb+XCxraceRA9KxjUFA515Oz8M0vt7D70pNp4tu41MOrHo0xc+CLsLPhvzZEzxP+F0/IK9QiKjEd4ZeT8PN1NTLzigAAHd0bQtXZDWNeaVHhMwyI6PnAoHhO/aHOxv4ryYi4eg/xDzRllo3r2RJvdHsBrZzqWag6IqpJGBTPESEELtxKw/rImzj++wMAwAsOdgj2booeHo3h5+XMifWIqBwGxXMi6k46Xv/6vO6EdNcWDpgb6IVOzewtWxgR1XgMimdcckYevjweh53n78BWKceY7s3xT/82cKjLS1mJyDQMimeQEAKX72Rg/t5riEkunXzv5eaN8O9QbzRzsLNwdURkbRgUzxBNQTE2n76Fr07cRF6RFrZKOfq1dcbUfp7o5N6Qs7ISUZUwKJ4RB67ew+RvLwMA5DJA9ZIb5g9px7ulieipMSis2IPsApyNT8XO87dxLj4NAPDF6M4Y0rEJRw9EVG0YFFbozM2H+L8HY3XPeLBVyjGqSzPMCHiRl7cSUbVjUFiRA1fvYd6ea0jVFAIAVJ3dMKC9K3w9G6OuLf+vJCLz4N7FCuQUFGNpxHXsupAIOxsFurV0wNLhHdDamXdOE5H5MShqMCEEvr+chBn/vQIAaOtaH3ve68EHARGRpBgUNVCaphD7ryRjwb4YAEB9WyUm9m6FSX1ac3I+IpIcg6IG0ZYIfHUiDp8e/gNA6UnqcT1b4v1+nhxFEJHFMChqgPwiLb48FoevT8WjoLgEnV+wxwTfVujb1pkBQUQWx6CwICEE9l1Jxr/+G41Cbelzp/+Pb0vMGeQFOQ8xEVENwaCwgIJiLZb8eAPbz93WvbdseAeM9HFHLYXcgpUREZXHoJDYqT8f4MMfriIxLQ9dWzogoL0rRrzkzmdPE1GNxaCQSEmJwMzvo3XPoV4Z0hGvdWlm4aqIiIxjUJhZfpEW607cxPoTN1GoLYFjXRt88XpnvOrR2NKlERGZhEFhRnuikvBB2G8AgM4v2OPvr7ZAsLebZYsiIqokBoUZnP7zIebtvYZbDzVo4WiH6QNeRFCnppYui4ioSswaFCdPnsTSpUtRUlKCkSNHYsKECWWWZ2dn41//+heSk5Oh1WrxzjvvICQkxJwlmVVqTgFe//o8fldnAwAa17PFvqk90aA2T1QTkfUyW1BotVosXrwYW7ZsgYuLC0aMGAE/Pz+0bt1a1+bbb7+Fh4cH1q9fj7S0NAwcOBBBQUGwsbG+h+2cj0/FpG8vI01TiAHtXDBvSDs+dpSInglmC4ro6Gg0b94czZqVXtkTGBiIo0ePlgkKmUwGjUYDIQQ0Gg0aNmwIpdK6joYJIbDi0O9YH3kTALBxzMsY0N7VwlUREVUfs+2V1Wo1XF2f7DBdXFwQHR1dps0bb7yBSZMmwdfXFxqNBqtXr4ZcXv6Gs7CwMISFhQEA0tPTzVVypQghcOhaCpYdvIHEtDz0aO2IfwW0hXcze0uXRkRUrcwWFEKIcu/99fGcp0+fhpeXF7Zt24Y7d+7g7bffho+PD+rVK/uchdDQUISGhgIAVCqVuUo22f3sfIzeeA43H2gAABN7tcLsQW35+FEieiaZLShcXV2RkpKie61Wq+Hs7FymTXh4OCZMmACZTIbmzZvD3d0d8fHx6Nixo7nKemo3H+Sg36pIAIB/Oxeseq0TT1YT0TPNbBMLdejQAQkJCUhMTERhYSEiIiLg5+dXpk2TJk1w9uxZAMDDhw9x69YtuLu7m6ukp7bj3G0M+vcpAMDKER3x9Vs+DAkieuaZbUShVCoxf/58jB8/HlqtFiEhIfD09MSuXbsAAKNHj8bkyZMxZ84cBAUFQQiBGTNmwMHBwVwlPZUtv9zCov3XUddGgf++2wOdeC6CiJ4TMlHRyYQaTKVSITw8XNJtXr2biaAvT8NWKceZ2X5wrGcr6faJiJ7W0+w7Oae1EYlpuZj07SUAwJ73ejAkiOi5Y103LUgsO78IfT49AW2JwMqQjvBq0sDSJRERSY4jCj0KirUYuf4stCUCy4Z34JTgRPTc4oiiApl5RXhl+VHkFmoxsVcrvN7tBUuXRERkMQyKv9AUFGPk+jPILdRiUh8PzAx40dIlERFZFIPifzzMKcCYzRfwhzoH7/X1wL8C2lq6JCIii2NQPJKZV4Rha3/B3fQ8LAhqh7d7tLR0SURENQKDAqUnroc/Cgk+y5qIqCyTr3rKzc01Zx0Wk1+khWrdGcQ/1GDGgDYMCSKivzAaFJcvX8bgwYMxePBgAEBsbCwWLlxo7roks3BfDGKSs/BeXw9M8fO0dDlERDWO0aBYvnw5Nm/eDHt7ewBA27ZtcfHiRXPXJYnjsffx3a+JAMAT10REeph06KlJkyZlP1TBw4Wszc0HOXh7668AgO8nvWrhaoiIai6jJ7ObNGmCy5cvQyaTobCwENu3b4eHh4cUtZnVxO2l8zeFTeiOl5s3snA1REQ1l9GhwcKFC/Htt99CrVajd+/euHHjBhYsWCBFbWZz8o8HiLufg35tndGtlaOlyyEiqtGMjihu3bqFVatWlXnv0qVLePnll81WlDlpCorxzqNDTrMH8bwEEZExRkcUS5YsMek9azFzdzSKSwSm9G0NT5f6li6HiKjG0zuiiIqKQlRUFNLS0rBlyxbd+zk5OdBqtZIUV93WHo9DxNV7GPQ3V8zgHE5ERCbRGxRFRUXIzc2FVquFRqPRvV+vXj2sWbNGkuKq0930XHzy0+9oWKcW/j3K29LlEBFZDb1B0bVrV3Tt2hXDhw+Hm5ublDWZxfy9MQCA1aGdYKtUWLgaIiLrYfRkdp06dbBixQrExcWhoKBA9/62bdvMWlh1upuei2Ox99GuSQP4tXWxdDlERFbF6MnsGTNmoFWrVrh79y6mTJkCNzc3dOjQQYraqs1/Ht19/U//NhauhIjI+hgNioyMDIwcORJKpRJdu3bF8uXLceXKFSlqqxYlJQIbT8VDIZehX1tnS5dDRGR1jB56UipLmzg7O+PEiRNwdnZGSkqK2QurLmuO/Yn8ohIM7+wGuVxm6XKIiKyO0aCYNGkSsrOzMWvWLHz88cfQaDT48MMPpajtqeUVavHFsTgAwMKg9hauhojIOhkNir59+wIA6tevj+3btwMovTPbGnz36x1oSwRWh3ZCQ7tali6HiMgq6Q0KrVaLgwcPQq1Ww9fXF23atMHx48exYcMG5OfnY8+ePRKWWXlCCKw89DsAYNDfmhhpTURE+ugNirlz5+LevXvo2LEjlixZAjc3N0RFRWHGjBno37+/lDVWyazvo5FXpEW3lg6oXYv3TRARVZXeoLh27Rr27dsHuVyOgoICdO/eHYcPH4aTk5OU9VVJVn4R/nPxLhRyGb4d383S5RARWTW9l8fWqlVL94AiW1tbtGjRwipCAgDmhF8FAGwe6wOlwvofskREZEl6RxTx8fEICgrSvb5z506Z1/v37zdvZVUU9usdRETfw5COTdDnRd43QUT0tPQGxYEDB6Sso9r85+JdAMDKER0tXAkR0bNBb1BY40SAxdoSxN7LQgtHO9jZGL3yl4iITGDWA/gnT55EQEAA/P39sXHjxgrbnD9/HsHBwQgMDMSbb775VNv7Q50DTaEWY15p8VTrISKiJ8z2s1ur1WLx4sXYsmULXFxcMGLECPj5+aF169a6NllZWVi0aBE2bdqEpk2bIjU19am2ueP8bQBAuyYNnmo9RET0hEkjivz8fMTHx1dqxdHR0WjevDmaNWsGGxsbBAYG4ujRo2Xa7N+/H/7+/mjatCkAwNHRsVLb+Ktz8amQyYDurRyeaj1ERPSE0aA4duwYgoODMX78eADAjRs38O677xpdsVqthqurq+61i4sL1Gp1mTYJCQnIysrCmDFjoFKpnupu78S0XMQ/0GBq39aQyTj5HxFRdTF66OnLL7/E7t27MWbMGACAl5cXkpKSjK5YCFHuvb/uwLVaLWJiYrB161bk5+dj1KhR6NSpE1q2bFmmXVhYGMLCwgAA6enpFW7vi2N/AgA6utsbrY2IiExnNCgUCgXq169f6RW7urqWmY5crVbD2dm5XJtGjRrBzs4OdnZ28PHxQWxsbLmgCA0NRWhoKABApVJVuL3kjHwAQP92fIIdEVF1MnroydPTE/v374dWq0VCQgI+/vhjdO7c2eiKO3TogISEBCQmJqKwsBARERHw8/Mr06Zfv364ePEiiouLkZeXh+joaHh4eFTpi1xNykQn94ZV+iwREelndEQxb948rF+/HjY2Npg+fTp69uyJyZMnG1+xUon58+dj/Pjx0Gq1CAkJgaenJ3bt2gUAGD16NDw8PODr64uhQ4dCLpdjxIgRaNOm8o8rvZ2qQWZeEV5q3qjSnyUiIsNkoqKTCf/j+vXraNeunVT1GKVSqRAeHl7mvfd2XkZE9D0c/IcvvHhpLBFRORXtO01ldESxfPlyPHjwAAMHDkRgYCA8PT2rtCFzOnD1HgAwJIiIzMBoUGzfvh0PHjzAwYMHMW/ePGg0GgwaNMikw09SuJ6cBSGAQX9zNd6YiIgqzaQb7pycnPDWW29h0aJFaNu2LdatW2fuuky2+1LpJIB/f7WFZQshInpGGR1R3Lx5EwcOHMBPP/0Ee3t7DB48GLNnz5aiNpNk5xcBADq/wBPZRETmYDQo5syZg8DAQGzevBkuLjXvHoXE9Fx4NWkAGyUfUEREZA5Gg+I///mPFHVUSUmJwLWkLPRtywcUERGZi96g+Mc//oHPP/+8zFPt/ldNeMLdb3czkFNQjFdaPd1kgkREpJ/eoJg7dy4AYP369ZIVU1mRvz8AwNliiYjMSe+B/cfzMu3cuRNubm5l/tm5c6dkBRry5/1sAMALDnYWroSI6Nll9AzwmTNnyr138uRJsxRTWbdTc+FQ1wZKBU9kExGZi95DTzt37sSuXbuQmJhY5jyFRqPBSy+9JElxxtxNz0NHTgRIRGRWeoMiKCgIvXr1wmeffYbp06fr3q9bty7s7e2lqM2ggmItMvOKeNiJiMjM9AaFTCaDu7s75s+fX25ZRkaGxcPizM3S52u/6Fr5Z2UQEZHp9AbF9OnTsWHDBqhUKshksjJPrJPJZOWefy21xLRcAIBPc17xRERkTnqDYsOGDQBKn5ldE916qAEAODewtXAlRETPNqOXC126dAm5uaW/3vfu3Yvly5cjOTnZ7IUZcz4+DQDgWNfGwpUQET3bjAbFwoULUadOHcTGxmLTpk1o2rQpZs6cKUVtBqVqClDXRgGZTGbpUoiInmlGg0KpVEImk+HIkSN46623MHbsWGg0Gilq0ytdUwh1VgF6tG5s0TqIiJ4HRoOibt262LBhA/bt24c+ffpAq9WiuLhYitr0OhtfesXTgPZ8WBERkbkZDYrVq1fDxsYGy5Ytg5OTE9RqNcaNGydFbXrF3ssCAPRqwxEFEZG5GQ0KJycnBAUFITs7G8ePH4etrS2GDRsmQWn6qbMKUEshg3P92hatg4joeWA0KA4cOICRI0fi0KFDOHjwoO5vS4pKTIdTPV4WS0QkBaMPLlq/fj12794NR8fSZz6kpaXh73//OwYOHGj24vT5Q52DN7q9YLHtExE9T4yOKIQQupAAAHt7+zJ3aVuKtsTyNRARPQ+Mjih69uyJcePGITAwEEDpoahevXqZvTB9Sh6FVCunuhargYjoeWI0KGbNmoXDhw/j0qVLEEIgNDQU/v7+UtRWoeJHI4kiLUcURERS0BsUCQkJWLFiBRITE9GmTRvMmjULLi4uUtZWoccB4dqAVzwREUlB7zmKDz/8EH379sWaNWvQvn17fPzxx1LWpdfjQ0/ujepYuBIioueD3hGFRqPBa6+9BgBo1aoVhg8fLllRhhQVlwAAnDmiICKShN6gKCgowPXr13VXOOXn55d53b59e2kq/Iv8Ii0AoElDBgURkRT0BoWTkxOWL1+ue924cWPda5lMhm3btpm/ugrIH80WW7uWwiLbJyJ63ugNiu3bt0tZh8m0QqB+baMXaxERUTUxesNdTVOkFSjmpbFERJIxa1CcPHkSAQEB8Pf3x8aNG/W2i46OhpeXl0lzSMllQIM6HFEQEUnFbEGh1WqxePFibNq0CREREfjxxx8RFxdXYbtPP/0UPXv2NG29JYL3UBARScikuZ727t2LL7/8EgCQnJyM6OhooyuOjo5G8+bN0axZM9jY2CAwMBBHjx4t12779u0ICAgoM5+UIdoSgUZ8TjYRkWRMemb2b7/9hoiICAClT7xbtGiR0RWr1Wq4uj55Ap2LiwvUanW5NkeOHMGoUaMMrissLAwqlQoqlQrFWi1sFFZ3aoWIyGoZ3eNGR0djwYIFsLUtff5Dw4YNUVRUZHTFFc0wK3t0aetjS5cuxYwZM6BQGL7UNTQ0FOHh4QgPD4dcrkAtBgURkWSMnhVWKpXQarW6nXxaWhrkcuM7aldXV6SkpOheq9VqODs7l2lz7do1TJs2DQCQnp6OyMhIKJVK9O/fX+96hRBQyGV6lxMRUfUyGhRjxozBe++9h9TUVKxevRqHDh3CBx98YHTFHTp0QEJCAhITE+Hi4oKIiAisWrWqTJtjx47p/p49ezb69OljMCQAoFBbors7m4iIzM9oUAwdOhTt27fHuXPnIITAunXr4OHhYXzFSiXmz5+P8ePHQ6vVIiQkBJ6enti1axcAYPTo0VUqWCGX8a5sIiIJyYSRx9UlJydX+H7Tpk3NUpAxbV/pj9Effo4FQZaZa4qIyBqpVCqEh4dX6bNGRxQTJ07U/V1QUIC7d++iZcuWuqugpCYEoOQ5CiIiyRgNiv3795d5HRMTg7CwMLMVZEyJEFCYcDKdiIiqR6X3uO3bt8fVq1fNUYvJMnILLbp9IqLnidERxZYtW3R/l5SU4Pr163BwcDBrUca84Ghn0e0TET1PjAaFRqPR/a1QKNC7d28EBASYtShj5DKeoyAikorBoNBqtdBoNJg1a5ZU9ZiEMUFEJB295yiKi4uhUChw/fp1KesxCUcURETS0TuiGDlyJH744Qd4eXnh3XffxcCBA2Fn9+TcwIABAyQpsCLMCSIi6Rg9R5GZmYlGjRrh/PnzZd63ZFBwREFEJB29QZGamootW7bA09MTMpmszGywf50FVmrMCSIi6egNipKSkjJXPNUkHFEQEUlHb1A4OTlhypQpUtZiMs7gQUQkHb1XPRmZK5CIiJ4TeoNi69atEpZRSTz0REQkGb1BYW9vL2EZleNS39bSJRARPTeschpWbQkPixERScUqg6JebaO3fxARUTWxyqDgo1CJiKRjlUHB+yiIiKRjlUGh4I0URESSsc6g4IiCiEgyVhkUfGQ2EZF0rHKXy3MURETSscqgqGfLy2OJiKRilUEh58lsIiLJWGdQMCeIiCRjpUHBpCAikgqDgoiIDLLSoLB0BUREzw8rDQomBRGRVKwzKDikICKSjFUGRR3OHktEJBmzBsXJkycREBAAf39/bNy4sdzyffv2ISgoCEFBQRg1ahRiY2PNWQ4REVWB2YJCq9Vi8eLF2LRpEyIiIvDjjz8iLi6uTBt3d3fs2LED+/fvx6RJkzBv3jxzlUNERFVktqCIjo5G8+bN0axZM9jY2CAwMBBHjx4t0+all15Cw4YNAQDe3t5ISUkxad08l01EJB2zTZqkVqvh6uqqe+3i4oLo6Gi97Xfv3o1evXpVuCwsLAxhYWHVXiMRERlntqAQQpR7T6ZnKHDu3Dns3r0bO3furHB5aGgoQkNDAQCe3fqBAwoiIumYLShcXV3LHEpSq9VwdnYu1y42NhYfffQRvv76azRq1Mhc5RARURWZ7RxFhw4dkJCQgMTERBQWFiIiIgJ+fn5l2iQnJ2Pq1KlYuXIlWrZsafK69Y1MiIio+pltRKFUKjF//nyMHz8eWq0WISEh8PT0xK5duwAAo0ePxtq1a5GRkYFFixYBABQKBcLDw81VEhERVYFMVHQyoQbz7NYPsWePQMG7s4mITKZSqar8Q9wq78xmRBARSccqg4KIiKRjlUHBc9lERNKxyqAgIiLpWGVQ8PJYIiLpWGVQEBGRdBgURERkEIOCiIgMYlAQEZFBDAoiIjKIQUFERAYxKIiIyCAGBRERGWR1QcFb7YiIpGV1QUFERNJiUBARkUEMCiIiMohBQUREBllfUPBsNhGRpKwvKIiISFJWGBQcUhARSckKg4KIiKTEoCAiIoMYFEREZBCDgoiIDLK6oOCpbCIiaVldUBARkbQYFEREZBCDgoiIDGJQEBGRQQwKIiIyiEFBREQGMSiIiMggswbFyZMnERAQAH9/f2zcuLHcciEElixZAn9/fwQFBSEmJsac5RARURWYLSi0Wi0WL16MTZs2ISIiAj/++CPi4uLKtDl58iQSEhJw+PBhfPzxx1i4cKG5yiEioioyW1BER0ejefPmaNasGWxsbBAYGIijR4+WaXP06FEMGzYMMpkM3t7eyMrKwv379w2ul3dmExFJS2muFavVari6uupeu7i4IDo62mAbV1dXqNVqODs7l2kXFhaGsLCw0oJzUqBSqcxVtlVJT09Ho0aNLF1GjcC+eIJ98QT74olbt25V+bNmCwohRLn3ZDJZpdsAQGhoKEJDQwEAKpUK4eHh1VSldWNfPMG+eIJ98QT74omn+YFttkNPrq6uSElJ0b2uaKTw1zYpKSnl2hARkWWZLSg6dOiAhIQEJCYmorCwEBEREfDz8yvTxs/PD3v27IEQAr/99hvq16/PoCAiqmHMduhJqVRi/vz5GD9+PLRaLUJCQuDp6Yldu3YBAEaPHo3evXsjMjIS/v7+qFOnDpYtW2Z0vY8PQRH74n+xL55gXzzBvnjiafpCJio6UUBERPQI78wmIiKDGBRERGRQjQ0KTv/xhLG+2LdvH4KCghAUFIRRo0YhNjbWAlVKw1hfPBYdHQ0vLy8cOnRIwuqkZUpfnD9/HsHBwQgMDMSbb74pcYXSMdYX2dnZePfddzF06FAEBgbi+++/t0CV5jdnzhy88sorGDJkSIXLq7zfFDVQcXGx6Nevn7hz544oKCgQQUFB4s8//yzT5sSJE2LcuHGipKREREVFiREjRlioWvMypS8uXbokMjIyhBCl/fI898XjdmPGjBHjx48XBw8etECl5mdKX2RmZopBgwaJpKQkIYQQDx8+tESpZmdKX3z11Vdi5cqVQgghUlNTRZcuXURBQYElyjWrCxcuiGvXronAwMAKl1d1v1kjRxTmmv7DGpnSFy+99BIaNmwIAPD29i5zb8qzxJS+AIDt27cjICAAjo6OFqhSGqb0xf79++Hv74+mTZsCwDPbH6b0hUwmg0ajgRACGo0GDRs2hFJptos+LaZLly66fUFFqrrfrJFBUdH0H2q12mCbx9N/PGtM6Yv/tXv3bvTq1UuK0iRn6r8XR44cwahRo6QuT1Km9EVCQgKysrIwZswYqFQq7NmzR+IqpWFKX7zxxhu4efMmfH19MXToUMydOxdyeY3c/ZlVVfebNTJSRTVO/2HtKvM9z507h927d2Pnzp3mLssiTOmLpUuXYsaMGVAoFFKVZRGm9IVWq0VMTAy2bt2K/Px8jBo1Cp06dULLli2lKlMSpvTF6dOn4eXlhW3btuHOnTt4++234ePjg3r16klVZo1Q1f1mjQwKTv/xhCl9AQCxsbH46KOP8PXXXz+zk6CZ0hfXrl3DtGnTAJROCBcZGQmlUon+/ftLWqu5mfrfSKNGjWBnZwc7Ozv4+PggNjb2mQsKU/oiPDwcEyZMgEwmQ/PmzeHu7o74+Hh07NhR6nItqqr7zRo59uL0H0+Y0hfJycmYOnUqVq5c+cztBP6XKX1x7Ngx3T8BAQFYsGDBMxcSgGl90a9fP1y8eBHFxcXIy8tDdHQ0PDw8LFSx+ZjSF02aNMHZs2cBAA8fPsStW7fg7u5uiXItqqr7zRo5ojDX9B/WyJS+WLt2LTIyMrBo0SIAgEKheCZnzDSlL54XpvSFh4eH7pi8XC7HiBEj0KZNGwtXXv1M6YvJkydjzpw5CAoKghACM2bMgIODg4Urr37Tpk3DhQsXkJ6ejl69emHq1KkoLi4G8HT7TU7hQUREBtXIQ09ERFRzMCiIiMggBgURERnEoCAiIoMYFEREZBCDgmokLy8vBAcH6/65e/eu3radO3d+6u3Nnj0bfn5+CA4OxvDhwxEVFVXpdcydOxdxcXEAgPXr15dZVl1TijzulyFDhuDdd99FVlaWwfY3btxAZGRktWybnl+8PJZqpM6dO5u8s65MW31mz56NPn36YODAgTh9+jRWrFiB/fv3V3l91VGTsfXOmjULLVq0wKRJk/S2Dw8Px7Vr1zB//vxqr4WeHxxRkFXQaDQYO3Yshg8fjqCgIBw5cqRcm/v37+ONN97Q/eK+ePEigNJ5fkJDQzF8+HC8//770Gg0BrfVpUsX3LlzBwCwZcsWDBkyBEOGDMHWrVsBALm5uZgwYQKGDh2KIUOG4MCBAwCAMWPG4OrVq/j000+Rn5+P4OBgTJ8+HcCTUc8HH3xQ5hf+7Nmz8dNPP0Gr1WLFihUICQlBUFAQvvvuO6N94u3trZvQLTo6GqNGjcKwYcMwatQoxMfHo7CwEGvWrMGBAwcQHByMAwcOIDc3F3PmzEFISAiGDRtWYT8SlfNUk58TmUnbtm3F0KFDxdChQ8XkyZNFUVGRyM7OFkKUPk+gf//+oqSkRAghhLe3txBCiM2bN4t169YJIUqfUZCdnS1SU1PF66+/LjQajRBCiA0bNogvvvii3PZmzZqle3bFgQMHxIgRI8TVq1fFkCFDhEajETk5OWLw4MEiJiZGHDp0SMydO1f32aysLCGEEG+++aaIjo4uU9Njj18fPnxYzJw5UwghREFBgejVq5fIy8sT3333nVi7dq3u/eHDh4s7d+6Uq/PxeoqLi8XUqVNFZGSkEEKI7OxsUVRUJIQQ4pdffhFTpkwRQgjx/fffi0WLFuk+v2rVKrFnzx4hROnzKgYMGKDrGyJ9auQUHkS1a9fG3r17da+Liorw2Wef4ddff4VcLodarcbDhw/h5OSka9OhQwd8+OGHKC4uRv/+/eHl5YXjx48jLi5ON71HUVERvL29K9zmypUr8dVXX8HBwQFLly7F2bNn0b9/f9jZ2QEA/P39cfHiRfj6+mLFihX45JNP0LdvX/j4+Jj8vXr16oUlS5agsLAQJ0+ehI+PD2rXro1ffvkFv//+O3766ScApU9ku337Npo1a1bm849HKklJSWjfvj169Oihaz9r1izcvn0bMpkMRUVFFW7/9OnTOHbsGL755hsAQEFBAe7du/dMzgFF1YdBQVZh//79SEtLQ3h4OGrVqgU/Pz8UFBSUadOlSxfs2LEDkZGRmDlzJsaNG4cGDRqgR48e+Oyzz4xuY+bMmRg4cKDu9ZkzZyps17JlS4SHhyMyMhKrVq1Cjx49MGXKFJO+h62tLbp27YpTp07h4MGDCAwMBFA6/fNHH30EX19fg59/HKDZ2dmYOHEivv32W7z11lv4/PPP0a1bN6xduxZ3797FW2+9pXcda9asQatWrUyqlwjgOQqyEtnZ2XB0dEStWrVw7tw5JCUllWuTlJQER0dHvPbaawgJCUFMTAy8vb1x+fJl3L59GwCQl5eHW7dumbTNLl264MiRI8jLy0Nubi6OHDkCHx8fqNVq1KlTB8HBwRg3bhyuX79e7rNKpVLvr/rAwECEh4fj4sWL6NmzJwCgZ8+e2LVrl+4zt27dQm5urt7a6tevj48++gjffPMNioqKkJ2dDRcXFwDADz/8oGtXt27dMudkevbsiR07duieS1BR7UR/xREFWYWgoCBMmjQJKpUKXl5eFf4ivnDhAjZv3gylUgk7OzusWLECDg4OWL58OaZNm4bCwkIApSeUTZmOvX379lCpVBg5ciQAYMSIEWjXrh1OnTqFlStXQi6XQ6lUYuHCheU++9prr2Ho0KFo164dVq1aVWZZjx49MGvWLPj5+cHGxgYAMHLkSCQlJUGlUkEIgUaNGmHdunUG62vXrh3atm2LiIgIjB8/HrNnz8aWLVvQvXt3XZtu3bph48aNCA4OxsSJEzF58mQsW7YMQ4cOhRACbm5u2LBhg9G+oOcbL48lIiKDeOiJiIgMYlAQEZFBDAoiIjKIQUFERAYxKIiIyCAGBRERGcSgICIig/4/j7xZY6u4tMoAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-b9477826fb507d36.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-f814e2d804a22203.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-24ae0c22f739e6fa.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-7447dd57147cebd3.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****** Crossval split: 2/4 ******\n",
      "\n",
      "Filtering training data\n",
      "Filtered 35%; 32462 remain\n",
      "\n",
      "Filtering evalation data\n",
      "Filtered 52%; 24113 remain\n",
      "\n",
      "Labeling training data\n",
      "Labeling evaluation data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3d0888fca1887e80.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Labeling evaluation OOS data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
      "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "<ipython-input-15-21e1cede4c54>:45: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='834' max='834' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [834/834 01:33, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>0.645900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>0.582800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>0.461700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>0.350200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>500</td>\n",
       "      <td>0.262800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>600</td>\n",
       "      <td>0.180400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>700</td>\n",
       "      <td>0.140900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>800</td>\n",
       "      <td>0.109600</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-8e85e7414566994a.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-e2704cdfc217c3e3.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-e213b038886d7cd4.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d6c9eba9fe9ffafc.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-442181417de57bb6.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-0d8563be811b9c30.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-85690e0bf5863858.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3bdda0a32e054f19.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3abe0ffb170c29f0.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-b132478871346000.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-09db8f6a69301008.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-34ae599619e2ced6.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-c74b97625f913f63.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-228b6002a6690208.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d644cc9c55478a2a.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d3d097800ebd687c.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-2e536900ba2b88cc.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-0434f2adbb78af27.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-926036de71570e84.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-d7f012de8332824e.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-57a002ae2aa9ba42.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-0476d5fed302e1c5.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-69341790285e8ce2.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-ee190fa69ba78df3.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-4b3dc879e23e8e63.arrow\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAvH0lEQVR4nO3deXhU5f338fcs2ROykQUCshlkKYoKLpVFAxE0hEgAA1X018JVxUp/rfIIuOCuF7RqtdUCReEBFfN7MKIILgUVtCgKopHtVxHCngAhIckkmfU8fwRCIyQTlpnJhM/rurjMmbnnnO/cV3s+c7b7NhmGYSAiItIIc6ALEBGRlk1BISIiTVJQiIhIkxQUIiLSJAWFiIg0SUEhIiJNUlCIiEiTrIEuQKQly8jI4MiRI1gsFiIjIxk4cCCPPPIIUVFRAHz77bf85S9/4YcffsBsNtO/f3+mTp3KxRdfXL+OqqoqXnzxRf75z39y7Ngx2rZty/XXX8/kyZNJSEgI1FcTaTYdUYh4MWfOHDZt2sSyZcvYunUr8+bNA2DTpk1MnDiRIUOG8Pnnn7N69WouueQSxo8fz969ewFwOBzceeed7Nixg/nz57Nx40beeust4uLi+OGHHwL5tUSaTUcUIs2UlJTEgAED2LZtGwB/+tOfyMnJ4c4776xv88c//pEtW7bw17/+ldmzZ/Puu+9y8OBBFi1aVH8UkpiYyO9+97uAfAeRs6EjCpFmKi4u5vPPP+eiiy6ipqaGTZs2MXz48FPa3XTTTaxbtw6AdevWMXDgwPqQEAlGOqIQ8eLEr//q6mquueYafv/733Ps2DE8Hg9JSUmntE9KSqKsrAyA8vJyevfu7dd6Rc43HVGIePHyyy+zadMmFi9ezM6dOykrK6NNmzaYzWYOHz58SvvDhw8THx8PQFxc3GnbiAQTBYVIM1111VXk5uYya9YsIiMj6du3Lx9++OEp7T744AOuueYaAH75y1/yxRdfUF1d7e9yRc4bBYXIGbjzzjtZt24d27Zt4/7772fZsmUsWrSIqqoqjh07xgsvvMB3333HvffeC0BOTg6pqalMmTKFn376CY/HQ1lZGXPmzGHNmjUB/jYizaOgEDkDCQkJ5OTk8Morr9CvXz/mz5/PP//5TwYOHMgNN9zAtm3bePPNN+ncuTMAoaGhLFy4kK5du/Kb3/yGK6+8krFjx1JWVsall14a2C8j0kwmTVwkIiJN0RGFiIg0SUEhIiJNUlCIiEiTFBQiItKkoHsy++qrryYtLS3QZYiIBJX9+/ezfv36s/ps0AVFWloaBQUFgS5DRCSo5ObmnvVndepJRESapKAQEZEmKShERKRJCgoREWmSgkJERJqkoBARkSb5LChmzJjBtddey4gRI077vmEYPPXUU2RmZpKdnc2WLVt8VYqIiJwDnwVFbm4u8+fPb/T9tWvXUlRUxMcff8yTTz7JY4895qtSRERaPcMw8HgM3B4Dl9uDw+XB7nJT66z7dy589sBd//792bdvX6Pvr169mltuuQWTyUTfvn2pqKjg0KFDJCcn+6okEblAeTwG1cd3mBU1TpxuA6fbg+v4TrWkwo7FbKrbyXo8x3e2BgeP1RIVZsHh9vDTIRttIqwYxvGdsgGe4/+tWz7xd8Pl3aU2kmLCcLgNnC4P+8trCLGYMJtMuI/v3F3Hd/Bl1Q5qnR4iQy2YTSY8hlG3Pk78t27dDf6mbpveXHEO/RewJ7NLSkpITU2tX05NTaWkpOS0QZGfn09+fj5A/aT1InJhqLK7+HdJJTsOVeF0e6hxuNldWk1kmAW708OOQ1WU2hyEWc24PB5cx0Pgp8M2YiNCcLg81JzjL+r/1Cbcitlct6M3m8B0/L9mkwkTx5fNHH/fBEBRaTVd2kYRajHTMSGCw5V2uraNxmIxYTWbsJhMWMwmrBYTZTYnidGhhIdYjq+vbp0n1m0ycfJ16rZN/fsnXzvxOY6/vurfZ/+dAxYUp5sv6cSX+rm8vDzy8vKAc3sMXUT8y+5yc6zGid3p4XCVnVqHm71l1RyrcXKgvJZ9ZTXsLrURHW6t38HvPGwjNjIEp9tDebWzyfW3CbcSarXg9nhoExHCxUnRWC0mQixm+qTFUlHromvbKKwWMwYGHeIicHsM2kSEEBVmJcRiwmquOwMfGxFCRKilbodtPr7jNpuJCLEQFmImxGLGYj79PioYrHrp7D8bsKBITU2luLi4frm4uFinnURaOMMwqLS7OFxpp8zmoNTmYO/RavaV1fDD/mO4PQaHKmo5XGXH7ak79eJNWlwEbsOgc2IUIRYTFydHU17tpFtSFGEhFuxON13aRnFJahs6xEfQJjyEsBAzYVZzoz8u5fwKWFBkZGTw+uuvk5WVxffff09MTIyCQiRAnG4PWw5UcLC8hsNVdmocbg4eq2XnERtVtU7MJhPbiyupsrtO+/lQq5mk6DAAUmPD+UVaLCEWM5ekxuBye+iYEEmo1UxMuJXkmHBiI0JIigkjPMTiz68pZ8lnQXHffffx9ddfU1ZWxqBBg5gyZQouV93/yMaPH8/gwYNZs2YNmZmZRERE8Mwzz/iqFJELksvtwWZ3U17joKzaSfGxGqrsboqP1VDr9LDriI3dR23sPGyjxuk+5YKoxWyiXWw4bo9BWlwE13RNwOk26JYUTfu4cDolRhEZauGS1BgSo0L1674V81lQPP/8802+bzKZePTRR321eZFWwTAM7C4PlbUuSipqOVBeg9NtYHe5KamwH78rxqDW6eGnw1W4PQbfFB3F4fJgc3i/gNs+NpxruibSLjac/p0T6Nw2ivax4USFWQkPsQT1OXk5f4JuPgqRYOb2GNgcLipqnJRXO9lXVsPWA8cwmUyUVTv4fm85e8tqCLWYKa6oPaN1m00QEWLhys4JmE3QPSWG8BALaXHhJEaFERNuJTLUSkpsGPGRoYRYNDCDNI+CQuQ8KK92cKTKQfGxWnaV2vjf4goOV9oprXJQZXdRanNQUePE7vI0uo6IEAvt4sKJiwghISqUG3unUGV30S0pmogQC21jwkiMqrttsm10KGFWCxEhFiLDLFjNJp36EZ9RUIg0otbp5lCFnbJqB/vLa3B7DHaX2rBazGw/WEFRaTWlNjt2p4dDlfYGnw2xmLgoIZLE6DA6JkTyi7RYYiNCsJhNhFvNpMZGYDZBekoMHeMjSIwO02keabEUFNKquT0GlbVOKmtd2F0equwuKmud7C+rwen2sL+8Ftvx145UOdheXElEqJlqu5tSm8Pr+jvER9CrfRtuToziktQYEqJCuSghku4pMdrxS6uhoJCgYRh1Qx04j9/Nc6TKzqFKOzsPV7G7tBqPYfD9vmMcqbRjd3k4UmX3uk6z6fiDViEW2kSE0Kt9G6pqnfRIb0NsRAgd4iMIsZhJi4sgMTqU6DArCVGhRIRYdKpHLhgKCvG7E3fy7D1azfbiSoqO2NhxfGcfajGzvbiC+KhQXG4Dh9vD4UrvO3yoO93T/vgOvXtKDNFhVmqdbi5Ojsbh9pAWF0GY1YLJVPeQV0JUKEkxYbqoK+KFgkLOK4/HYH95DT8eqmR/WQ0/HbZRVGqjxuGmuKKW3aXVjX42MtRC+7gILusYh9PtoUN8JCEWMyEWE0dtDromRRNqMWG1mDGbICrMSrekaBKjQuncNko7fBEfUVBIs3k8Boer7Gw5cIwt+yv48VAVB4/VcKjSTq3TTbXDTWXt6Z/cTYoJo3/neH7ZrW39r3yA9ORouqfEkHb8FI+ItDwKCgHqTgeVVTupqnWxrbiC0ioHB8pr2HGoip1Hqth52IbrNAP3mE2Q0iac+MhQhvSMJ9Ripk24lX6dE0hPiSYhqu42ThEJXgqKC0yV3cW2gxWs21HKriNVlNc4+X5vOXaXh+qfPclrMkHc8TF5ru2WSEJUKGlxEVzZKZ5OiZF0bRuNWXf2iLR6CopWyOHysLesmkMVdrYdrGDZd/spqailpOL0F4W7JUWRHBPO9ZckER8VSpjVTO/2sXRpG6VbPEVEQRHsHC4P63eVsn7nUT7/8TClNgf7ymoatGkfG077uAiG9EwhJtxKx/hIBlzcls5towJUtYgEEwVFkPB4DHYfrWbrgQoOHqvhnU372XKg4pR2l3WIJadve6LCrHROjKJ3+zZclBCpe/5F5KwpKFooh8vD1oMV5H+zh3e/O3DK9YOEqFCuuCiO7ikxpKfEMLh7W7olRSsQROS8U1C0IBW1ThatK2Ltj0co3FdOrbNuALkwq5nMXin0bt+GKzvFc0lqDEnRYQoFEfELBUWAeTwGi74s4pXPfqofWK5TYiS39E3j2m6J/CItlm5J0QGuUkQuZAqKADAMg7U/HiH/mz2s/OHkvOFDe6aQ07c92Ze1D2B1IiINKSj8aF9ZNf93XRErCg9y4FjdpDTdU6LJvaIDEwd00ZPJItIiKSj84EB5DX/M/471u44CEBcZwvSbepB7RRrJMeEBrk5EpGkKCh+x2V0sXFdEwbf7+OmwDYArO8XzwLBLuLprYoCrExFpPgXFeeTxGHyx4wiLvixi/a6jVNa6MJkgo0cy92V25xdpsYEuUUTkjCkozgOPx+C3izfy3d7y+slyLu0Qy38PSeeGS5I1HpKIBDUFxTlYv7OUZz7Yzvd7y+tf+z/DLmHCtZ1oEx4SuMJERM4jBcVZ+J8Ne3lgaWH9cq92bRhzZQd+fV1nPQQnIq2OguIMVNQ6ef7jf7NwXREAN/ZKYWZ2LzrERwa2MBERH1JQNNPnPx5m4v/dgMPlIaNHMn8Z11enl0TkgqCg8KK82sHU/1fIqm0lADw39jJGX9khwFWJiPiPgqIJm/cfI/eVdTjcHn7ZLZHZYy7VaSYRueAoKE7D7TG48ql/Ul7txGo2cX9md6YMSQ90WSIiAaGg+Jkdh6r4/ZJNlFc7CbWa+Wzq9bSPiwh0WSIiAaOg+A9VdhdDn18DwF2DuzLjpp4BrkhEJPAUFMfVOt38ZuE3ANyX2Z3f61STiAgAPh3Xeu3atQwbNozMzEzmzZt3yvuVlZXcfffdjBw5kqysLN5++21fltMop9vD0OfX8PWuo9zYK0UhISLyH3x2ROF2u3niiSdYsGABKSkpjBkzhoyMDC6++OL6Nm+88QbdunVjzpw5HD16lOHDh5OdnU1oaKivyjqFYRj8ZuE37CurYfL13Zg2vIffti0iEgx8dkRRWFhIp06d6NixI6GhoWRlZbF69eoGbUwmEzabDcMwsNlsxMbGYrX692zYy5/u4PMfj5CeHM0Dwy7x67ZFRIKBz/bKJSUlpKam1i+npKRQWFjYoM1tt93G5MmTGThwIDabjRdeeAGz+dTsys/PJz8/H4CysrLzVuOHm4v588f/Jj4yhA//MEjjNImInIbPgsIwjFNe+/mO+IsvvqBnz54sWrSIPXv28Otf/5p+/foRHR3doF1eXh55eXkA5Obmnpf6io/VcvfrGwF4557rsGgocBGR0/LZqafU1FSKi4vrl0tKSkhOTm7QpqCggBtvvBGTyUSnTp3o0KEDO3fu9FVJ9dbvLGXQ7E8BePDmHnRuG+XzbYqIBCufBUWfPn0oKipi7969OBwOVqxYQUZGRoM27dq148svvwTgyJEj7Nq1iw4dfDuO0sbdZeTN+wqH28Nr/9WP3w7q5tPtiYgEO5+derJarcycOZNJkybhdrsZPXo06enpLFmyBIDx48dzzz33MGPGDLKzszEMg6lTp5KQkOCrkgD4/ZJNALz8qyvI6JHi022JiLQGPr3FaPDgwQwePLjBa+PHj6//OyUlhddee82XJTQw//Od7C+v4VdXX0TWpe38tl0RkWDm0wfuWpIqu4u/frIDgEeyegW4GhGR4HHBBMVD7/zAsRonr97Zj4hQS6DLEREJGhdEUOw4VMm73x3g5j6pDOmp6xIiImfiggiKOWvqbrm9L7N7gCsREQk+rT4o9pVVs3TjPnq2a8PFyTGBLkdEJOi0+qD4dPshAB4YrnGcRETORqsOiopaJ4++t4WYcCvXdk0MdDkiIkGpVQfFP9buxGPAIyN6ER6iO51ERM5Gs4Oiurral3WcdzsOVfHKZz9xWYdYbu3XMdDliIgELa9B8e2333LzzTdz8803A7B9+3Yee+wxX9d1zhZ9WYTbY/CXcZcHuhQRkaDmNSieffZZXn31VeLi4gDo0aMHGzZs8HVd5+yDzcWktgmni0aGFRE5J8069dSuXcNxkU43uVBL8u+SSg5X2rm5j8ZzEhE5V14HBWzXrh3ffvstJpMJh8PB4sWL6datZQ/NveBfRQD86uqLAluIiEgr4PXQ4LHHHuONN96gpKSEwYMHs23bNh599FF/1HZWPB6DJV/v4bIOsVycHO39AyIi0iSvRxS7du3iueeea/Daxo0bufLKK31W1Ln46XAVAD1S2wS4EhGR1sHrEcVTTz3VrNdairU/HgHgpj6pAa5ERKR1aPSIYtOmTWzatImjR4+yYMGC+terqqpwu91+Ke5s5H+zh7bRYQxKTwp0KSIirUKjQeF0OqmursbtdmOz2epfj46O5qWXXvJLcWfqUGUt/y6p4q5BXTGbTYEuR0SkVWg0KK666iquuuoqRo0aRVpamj9rOmvrdx4FoF9n3867LSJyIfF6MTsiIoJZs2axY8cO7HZ7/euLFi3yaWFn4/WvdgPQq70uZIuInC9eL2ZPnTqVrl27sm/fPu69917S0tLo06ePP2o7YzsOVREdZiUtLiLQpYiItBpeg6K8vJyxY8ditVq56qqrePbZZ/n+++/9UdsZOVxpp9Tm0ACAIiLnmddTT1ZrXZPk5GQ+++wzkpOTKS4u9nlhZ+r7veUADExvG9hCRERaGa9BMXnyZCorK5k2bRpPPvkkNpuNBx980B+1nZGNe8oASE/R09giIueT16C44YYbAIiJiWHx4sVA3ZPZLc3m/ccAdH1CROQ8azQo3G43H3zwASUlJQwcOJDu3bvz6aefMnfuXGpra1m2bJkfy/Tuuz3lRIRYMJn0/ISIyPnUaFA89NBDHDx4kEsvvZSnnnqKtLQ0Nm3axNSpUxk6dKg/a2yWSruLDvE6mhAROd8aDYrNmzfz3nvvYTabsdvtXHPNNXz88cckJbW8oTHsrrohRa7qogftRETOt0Zvjw0JCamfoCgsLIzOnTu3yJAAqLbXBcUlKTEBrkREpPVp9Ihi586dZGdn1y/v2bOnwfLy5ct9W9kZOFxV98R4m4iQAFciItL6NBoUK1eu9Gcd5+TEHU9dNT+2iMh512hQBMtAgABFpdUAmtFORMQHvA7hcS7Wrl3LsGHDyMzMZN68eadts379enJycsjKyuL2228/q+28//0BurSNIiEq9FzKFRGR0/D6wN3ZcrvdPPHEEyxYsICUlBTGjBlDRkYGF198cX2biooKHn/8cebPn0/79u0pLS09q20VV9RyZad4PUMhIuIDzTqiqK2tZefOnWe04sLCQjp16kTHjh0JDQ0lKyuL1atXN2izfPlyMjMzad++PQCJiYlntA2AI1V2qh1urrtYYzyJiPiC16D45JNPyMnJYdKkSQBs27aNu+++2+uKS0pKSE09OW91SkoKJSUlDdoUFRVRUVHBhAkTyM3NPaunvbceqKhbf5uwM/6siIh45/XU09/+9jeWLl3KhAkTAOjZsyf79+/3umLDME557eenhtxuN1u2bGHhwoXU1tYybtw4LrvsMrp06dKgXX5+Pvn5+QCUlZU1eG9DUd2sdunJeoZCRMQXvAaFxWIhJubMd8KpqakNhiMvKSkhOTn5lDbx8fFERkYSGRlJv3792L59+ylBkZeXR15eHgC5ubkN3jticwDQq51mtRMR8QWvp57S09NZvnw5breboqIinnzySS6//HKvK+7Tpw9FRUXs3bsXh8PBihUryMjIaNBmyJAhbNiwAZfLRU1NDYWFhXTr1u2MvsCxGmfdFzHrQraIiC94PaJ45JFHmDNnDqGhodx///0MGDCAe+65x/uKrVZmzpzJpEmTcLvdjB49mvT0dJYsWQLA+PHj6datGwMHDmTkyJGYzWbGjBlD9+7dz+gLuN0GFyVEntFnRESk+UzG6S4m/IetW7fSq1cvf9XjVW5uLgUFBfXL1//pU0KtZj7+4+AAViUi0rL9fN95JrweUTz77LMcPnyY4cOHk5WVRXp6+lltyFfaRITgdDeZdSIicg68XqNYvHgxixcvJiEhgUceeYTs7GxeeeUVf9TWLNsPVtJJp55ERHymWQ/cJSUlcccdd/D444/To0ePFhUUUWEWbA5XoMsQEWm1vJ56+umnn1i5ciUfffQRcXFx3HzzzUyfPt0ftTVLWbVT81CIiPiQ16CYMWMGWVlZvPrqq6SkpPijpmarPn4kYXd5AlyJiEjr5TUo/ud//scfdZyVb3eXA9Cvc3xgCxERacUaDYr//u//5sUXX2wwq91/agkz3BVX1ALQLUnzUIiI+EqjQfHQQw8BMGfOHL8Vc6a2HawbELBDfESAKxERab0avevpxLhMb775JmlpaQ3+vfnmm34rsCk7D1cBEBepCYtERHzF6+2x69atO+W1tWvX+qSYM1VcYdfRhIiIjzV66unNN99kyZIl7N27t8F1CpvNxhVXXOGX4rxxezz0SNWtsSIivtRoUGRnZzNo0CCef/557r///vrXo6KiiIuL80dtXrncBhGhPpvNVUREaOLUk8lkokOHDsycOZOoqKj6fwDl5eX+qq9JO4/YCLFoeHEREV9q9Of4/fffz9y5c8nNzcVkMjWYsc5kMp0y/3UghFnNlFc7A12GiEir1mhQzJ07F6ibM7ulMgzoruE7RER8yutdTxs3bqS6uhqAd999l2effZYDBw74vDBvDMPA4fYQZm3WuIYiInKWvO5lH3vsMSIiIti+fTvz58+nffv2PPDAA/6orUlHqurmyna4Nc6TiIgveQ0Kq9WKyWRi1apV3HHHHdx5553YbDZ/1Nak3aV1NXRO1FwUIiK+5DUooqKimDt3Lu+99x7XX389brcblyvw8z/sOlIXFF01zpOIiE95DYoXXniB0NBQnnnmGZKSkigpKWHixIn+qK1JnuN3YbWLDQ9wJSIirZvXoEhKSiI7O5vKyko+/fRTwsLCuOWWW/xQWtO+/KkUqJszW0REfMdrUKxcuZKxY8fy4Ycf8sEHH9T/HWhf7TxKqMVMm3AFhYiIL3kd/2LOnDksXbqUxMREAI4ePcp//dd/MXz4cJ8X1xSTCXq1bxPQGkRELgRejygMw6gPCYC4uLgGT2kHysFjtZqwSETED7weUQwYMICJEyeSlZUF1J2KGjRokM8La4rHUxdUUWGWgNYhInIh8BoU06ZN4+OPP2bjxo0YhkFeXh6ZmZn+qK1RJx6yS2mjO55ERHyt0aAoKipi1qxZ7N27l+7duzNt2jRSUlL8WVujKmvrnuMwaeBYERGfa/QaxYMPPsgNN9zASy+9RO/evXnyySf9WVeTKmrrRoyN0R1PIiI+1+gRhc1m49ZbbwWga9eujBo1ym9FeXOwvBaANuGatEhExNca3dPa7Xa2bt1af4dTbW1tg+XevXv7p8Im6BqFiIjvNRoUSUlJPPvss/XLbdu2rV82mUwsWrTI99U1wuWpu5gdYtEQ4yIivtZoUCxevNifdZwRl7vuqEbToIqI+F5Q/iS3OeruerKag7J8EZGg4tM97dq1axk2bBiZmZnMmzev0XaFhYX07Nmz2WNIHa60AxAWoqAQEfE1n+1p3W43TzzxBPPnz2fFihW8//777Nix47Tt/vznPzNgwIBmr7uipu722PaxEeetXhEROb1mjfX07rvv8re//Q2AAwcOUFhY6HXFhYWFdOrUiY4dOxIaGkpWVharV68+pd3ixYsZNmxYg/GkvCmvcRITbiUiVEN4iIj4WrPmzP7uu+9YsWIFUDfj3eOPP+51xSUlJaSmptYvp6SkUFJSckqbVatWMW7cuCbXlZ+fT25uLrm5uZSVleF0G4RZFRIiIv7gNSgKCwt59NFHCQsLAyA2Nhan0+l1xacbYdb0szE3nn76aaZOnYrF0vROPy8vj4KCAgoKCoiPj8fp9hCqO55ERPzC66PNVqsVt9tdv5M/evQo5mbcbZSamkpxcXH9cklJCcnJyQ3abN68mfvuuw+AsrIy1qxZg9VqZejQoU2ue8uBCiwKChERv/AaFBMmTOB3v/sdpaWlvPDCC3z44Yf84Q9/8LriPn36UFRUxN69e0lJSWHFihU899xzDdp88skn9X9Pnz6d66+/3mtIAMRGWNl3tNprOxEROXdeg2LkyJH07t2br776CsMweOWVV+jWrZv3FVutzJw5k0mTJuF2uxk9ejTp6eksWbIEgPHjx5910W6PQZ8OsWf9eRERaT6T4WW6ugMHDpz29fbt2/ukIG9yc3Nh6P1EhVlZPPHqgNQgIhJscnNzKSgoOKvPej2iuOuuu+r/ttvt7Nu3jy5dutTfBRUIbo+BxaxrFCIi/uA1KJYvX95gecuWLeTn5/usoOZwGwYWzVokIuIXZ/xkdu/evfnhhx98UUuzudw6ohAR8RevRxQLFiyo/9vj8bB161YSEhJ8WpQ3/y6ppHNiVEBrEBG5UHgNCpvNVv+3xWJh8ODBDBs2zKdFeZMQFVo/gqyIiPhWk0Hhdrux2WxMmzbNX/U0y7EaJ92SogNdhojIBaHRaxQulwuLxcLWrVv9WY9XBuB0G+hatoiIfzR6RDF27Fjeeecdevbsyd13383w4cOJjIysf//GG2/0S4E/53DVTYMaEx4SkO2LiFxovF6jOHbsGPHx8axfv77B64EKihPPB/6ifZuAbF9E5ELTaFCUlpayYMEC0tPTMZlMDUaD/fkosP7kPD5fdliIhhkXEfGHRoPC4/E0uOOppTgRV2FWTYMqIuIPjQZFUlIS9957rz9raRa3p+4aRdvo0ABXIiJyYWj0Z7mXsQIDxuGqq6tNhC5mi4j4Q6NBsXDhQj+W0Xwnro60jQoLaB0iIheKRoMiLi7Oj2U0nweDEIsJs8Z6EhHxi6C7IuxyG4RZdceTiIi/BF1QeAyDhChdyBYR8ZegCwrDgMhQHVGIiPhL0AUFoLkoRET8KOiCwuXxYLUEXdkiIkEr6Pa4bo9BRY0z0GWIiFwwgi4ozCYTqW3CA12GiMgFI+iCwgCiw70OeisiIudJ0AUFgFUXs0VE/CbogsIwDN31JCLiR0EXFC6PoSMKERE/Crqg8BhQanMEugwRkQtG0AWF2QQd4iO9NxQRkfMi6IICNLudiIg/BeUeN4BTdouIXHCCLyiMuofuRETEP4IuKAxOznInIiK+59OgWLt2LcOGDSMzM5N58+ad8v57771HdnY22dnZjBs3ju3btzdrvZrdTkTEf3wWFG63myeeeIL58+ezYsUK3n//fXbs2NGgTYcOHXj99ddZvnw5kydP5pFHHmnWuhUTIiL+47OgKCwspFOnTnTs2JHQ0FCysrJYvXp1gzZXXHEFsbGxAPTt25fi4mKv6zUAk65RiIj4jc9G1yspKSE1NbV+OSUlhcLCwkbbL126lEGDBp32vfz8fPLz8+sWDEN3PYmI+JHPgsIwjFNea+xI4KuvvmLp0qW8+eabp30/Ly+PvLw8ALpfPQRdohAR8R+fBUVqamqDU0klJSUkJyef0m779u08/PDD/OMf/yA+Pt7reuvuelJSiIj4i8+uUfTp04eioiL27t2Lw+FgxYoVZGRkNGhz4MABpkyZwuzZs+nSpUuz160jChER//HZEYXVamXmzJlMmjQJt9vN6NGjSU9PZ8mSJQCMHz+el19+mfLych5//HEALBYLBQUF3leuixQiIn5jMk53MaEFS796CL97+u/8YWj3QJciIhI0cnNzm/dD/DSC7slsgPJqZ6BLEBG5YARlUHRLjg50CSIiF4ygDAoNMy4i4j9Bucd1uDyBLkFE5IIRlEGRFhcR6BJERC4YQRkUVotujxUR8ZfgDApzUJYtIhKUgnKPqyMKERH/Ccqg0F1PIiL+E5R73MhQS6BLEBG5YARlUGjiIhER/wnKoLAoKERE/CYog8KsoBAR8ZvgDIqgrFpEJDgF5S5XRxQiIv6joBARkSYFZ1AEZdUiIsEpKHe5uutJRMR/gjIoosJ8NtW3iIj8TFAGhYiI+E9QBoUuZouI+E9QBoVyQkTEf4IzKAJdgIjIBSQog0KnnkRE/Ccog0I5ISLiP0EaFEoKERF/CcqgEBER/1FQiIhIk4IuKHTSSUTEv4IuKERExL+CLyh0SCEi4lfBFxRKChERvwq6oFBMiIj4l0+DYu3atQwbNozMzEzmzZt3yvuGYfDUU0+RmZlJdnY2W7Zs8WU5IiJyFnwWFG63myeeeIL58+ezYsUK3n//fXbs2NGgzdq1aykqKuLjjz/mySef5LHHHvNVOSIicpZ8FhSFhYV06tSJjh07EhoaSlZWFqtXr27QZvXq1dxyyy2YTCb69u1LRUUFhw4danK9OvUkIuJfPpsqrqSkhNTU1PrllJQUCgsLm2yTmppKSUkJycnJDdrl5+eTn59fV3BVMbm5ub4qO6iUlZURHx8f6DJaBPXFSeqLk9QXJ+3ateusP+uzoDAM45TXfj5GU3PaAOTl5ZGXlwdAbm4uBQUF56nK4Ka+OEl9cZL64iT1xUnn8gPbZ6eeUlNTKS4url8+3ZHCz9sUFxef0kZERALLZ0HRp08fioqK2Lt3Lw6HgxUrVpCRkdGgTUZGBsuWLcMwDL777jtiYmIUFCIiLYzPTj1ZrVZmzpzJpEmTcLvdjB49mvT0dJYsWQLA+PHjGTx4MGvWrCEzM5OIiAieeeYZr+s9cQpK1Bf/SX1xkvriJPXFSefSFybjdBcKREREjgu6J7NFRMS/FBQiItKkFhsUGv7jJG998d5775GdnU12djbjxo1j+/btAajSP7z1xQmFhYX07NmTDz/80I/V+Vdz+mL9+vXk5OSQlZXF7bff7ucK/cdbX1RWVnL33XczcuRIsrKyePvttwNQpe/NmDGDa6+9lhEjRpz2/bPebxotkMvlMoYMGWLs2bPHsNvtRnZ2tvHjjz82aPPZZ58ZEydONDwej7Fp0yZjzJgxAarWt5rTFxs3bjTKy8sNw6jrlwu5L060mzBhgjFp0iTjgw8+CEClvtecvjh27Jhx0003Gfv37zcMwzCOHDkSiFJ9rjl98fe//92YPXu2YRiGUVpaavTv39+w2+2BKNenvv76a2Pz5s1GVlbWad8/2/1mizyi8NXwH8GoOX1xxRVXEBsbC0Dfvn0bPJvSmjSnLwAWL17MsGHDSExMDECV/tGcvli+fDmZmZm0b98eoNX2R3P6wmQyYbPZMAwDm81GbGwsVqvPbvoMmP79+9fvC07nbPebLTIoTjf8R0lJSZNtTgz/0do0py/+09KlSxk0aJA/SvO75v7vYtWqVYwbN87f5flVc/qiqKiIiooKJkyYQG5uLsuWLfNzlf7RnL647bbb+Omnnxg4cCAjR47koYcewmxukbs/nzrb/WaLjFTjPA7/EezO5Ht+9dVXLF26lDfffNPXZQVEc/ri6aefZurUqVgsFn+VFRDN6Qu3282WLVtYuHAhtbW1jBs3jssuu4wuXbr4q0y/aE5ffPHFF/Ts2ZNFixaxZ88efv3rX9OvXz+io6P9VWaLcLb7zRYZFBr+46Tm9AXA9u3befjhh/nHP/7RagdBa05fbN68mfvuuw+oGxBuzZo1WK1Whg4d6tdafa25/x+Jj48nMjKSyMhI+vXrx/bt21tdUDSnLwoKCvjtb3+LyWSiU6dOdOjQgZ07d3LppZf6u9yAOtv9Zos89tLwHyc1py8OHDjAlClTmD17dqvbCfyn5vTFJ598Uv9v2LBhPProo60uJKB5fTFkyBA2bNiAy+WipqaGwsJCunXrFqCKfac5fdGuXTu+/PJLAI4cOcKuXbvo0KFDIMoNqLPdb7bIIwpfDf8RjJrTFy+//DLl5eU8/vjjAFgsllY5YmZz+uJC0Zy+6NatW/05ebPZzJgxY+jevXuAKz//mtMX99xzDzNmzCA7OxvDMJg6dSoJCQkBrvz8u++++/j6668pKytj0KBBTJkyBZfLBZzbflNDeIiISJNa5KknERFpORQUIiLSJAWFiIg0SUEhIiJNUlCIiEiTFBTSIvXs2ZOcnJz6f/v27Wu07eWXX37O25s+fToZGRnk5OQwatQoNm3adMbreOihh9ixYwcAc+bMafDe+RpS5ES/jBgxgrvvvpuKioom22/bto01a9acl23LhUu3x0qLdPnllzd7Z30mbRszffp0rr/+eoYPH84XX3zBrFmzWL58+Vmv73zU5G2906ZNo3PnzkyePLnR9gUFBWzevJmZM2ee91rkwqEjCgkKNpuNO++8k1GjRpGdnc2qVatOaXPo0CFuu+22+l/cGzZsAOrG+cnLy2PUqFH8/ve/x2azNbmt/v37s2fPHgAWLFjAiBEjGDFiBAsXLgSgurqa3/72t4wcOZIRI0awcuVKACZMmMAPP/zAn//8Z2pra8nJyeH+++8HTh71/OEPf2jwC3/69Ol89NFHuN1uZs2axejRo8nOzuatt97y2id9+/atH9CtsLCQcePGccsttzBu3Dh27tyJw+HgpZdeYuXKleTk5LBy5Uqqq6uZMWMGo0eP5pZbbjltP4qc4pwGPxfxkR49ehgjR440Ro4cadxzzz2G0+k0KisrDcOom09g6NChhsfjMQzDMPr27WsYhmG8+uqrxiuvvGIYRt0cBZWVlUZpaanxq1/9yrDZbIZhGMbcuXONv/71r6dsb9q0afVzV6xcudIYM2aM8cMPPxgjRowwbDabUVVVZdx8883Gli1bjA8//NB46KGH6j9bUVFhGIZh3H777UZhYWGDmk44sfzxxx8bDzzwgGEYhmG3241BgwYZNTU1xltvvWW8/PLL9a+PGjXK2LNnzyl1nliPy+UypkyZYqxZs8YwDMOorKw0nE6nYRiG8a9//cu49957DcMwjLffftt4/PHH6z//3HPPGcuWLTMMo26+ihtvvLG+b0Qa0yKH8BAJDw/n3XffrV92Op08//zzfPPNN5jNZkpKSjhy5AhJSUn1bfr06cODDz6Iy+Vi6NCh9OzZk08//ZQdO3bUD+/hdDrp27fvabc5e/Zs/v73v5OQkMDTTz/Nl19+ydChQ4mMjAQgMzOTDRs2MHDgQGbNmsWf/vQnbrjhBvr169fs7zVo0CCeeuopHA4Ha9eupV+/foSHh/Ovf/2L//3f/+Wjjz4C6mZk2717Nx07dmzw+RNHKvv376d3795cd9119e2nTZvG7t27MZlMOJ3O027/iy++4JNPPuG1114DwG63c/DgwVY5BpScPwoKCQrLly/n6NGjFBQUEBISQkZGBna7vUGb/v378/rrr7NmzRoeeOABJk6cSJs2bbjuuut4/vnnvW7jgQceYPjw4fXL69atO227Ll26UFBQwJo1a3juuee47rrruPfee5v1PcLCwrjqqqv4/PPP+eCDD8jKygLqhn9++OGHGThwYJOfPxGglZWV3HXXXbzxxhvccccdvPjii1x99dW8/PLL7Nu3jzvuuKPRdbz00kt07dq1WfWKgK5RSJCorKwkMTGRkJAQvvrqK/bv339Km/3795OYmMitt97K6NGj2bJlC3379uXbb79l9+7dANTU1LBr165mbbN///6sWrWKmpoaqqurWbVqFf369aOkpISIiAhycnKYOHEiW7duPeWzVqu10V/1WVlZFBQUsGHDBgYMGADAgAEDWLJkSf1ndu3aRXV1daO1xcTE8PDDD/Paa6/hdDqprKwkJSUFgHfeeae+XVRUVINrMgMGDOD111+vn5fgdLWL/JyOKCQoZGdnM3nyZHJzc+nZs+dpfxF//fXXvPrqq1itViIjI5k1axYJCQk8++yz3HfffTgcDqDugnJzhmPv3bs3ubm5jB07FoAxY8bQq1cvPv/8c2bPno3ZbMZqtfLYY4+d8tlbb72VkSNH0qtXL5577rkG71133XVMmzaNjIwMQkNDARg7diz79+8nNzcXwzCIj4/nlVdeabK+Xr160aNHD1asWMGkSZOYPn06CxYs4Jprrqlvc/XVVzNv3jxycnK46667uOeee3jmmWcYOXIkhmGQlpbG3LlzvfaFXNh0e6yIiDRJp55ERKRJCgoREWmSgkJERJqkoBARkSYpKEREpEkKChERaZKCQkREmvT/ARqXxLeMr6ssAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-3c8713ea9ca7fcf8.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-c51c509a283b1c08.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-e3bf280f62a1ecd0.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-8a4c3c27f7ce74ce.arrow\n",
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-11b5a95b53a4e86b.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****** Crossval split: 3/4 ******\n",
      "\n",
      "Filtering training data\n",
      "Filtered 35%; 32464 remain\n",
      "\n",
      "Filtering evalation data\n",
      "Filtered 53%; 23712 remain\n",
      "\n",
      "Labeling training data\n",
      "Labeling evaluation data\n",
      "Labeling evaluation OOS data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
      "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "<ipython-input-15-21e1cede4c54>:45: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='834' max='834' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [834/834 01:32, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>0.660300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>0.588000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>0.465400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>0.331400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>500</td>\n",
       "      <td>0.241100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>600</td>\n",
       "      <td>0.168800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>700</td>\n",
       "      <td>0.136600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>800</td>\n",
       "      <td>0.113900</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /n/holyscratch01/xiaoleliu_lab/Users/ctheodoris/datasets/geneformer_corpus_2048_sorted.dataset/cache-c438e6f7f8463bbc.arrow\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6f8a9dd0a5754dec845c0022470a8c96",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eaa8acd785b34fe8ab7e2853b745bf9c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "55815cca43374fe1867219af483785e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "46388a65e68440928be961d7ae57bd05",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "17799d65feac4638a0071df44f6432db",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e103daf395794272989c209b32c12afc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "81053043727a4c1dbe23304e5ad6282a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5d1d3f2835b74004b267d67d04c24663",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "14f38354b0354bc187be9db34990fcce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4e3d47f0ecdc489ca34de778ebfb3021",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5997f34a471f4a918fd32043fc519bb3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "affe20b63e08414cb0863e1f6c1aad18",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fca7f8cafa504738b7eaddd3f7b708fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "11f299f23b124674ab9e334bdbe09288",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "01a88ef05cb64f24adecfb5674265a02",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2f88e6525cbd486c9f03491a04681283",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8bb884df7370471d986c51c10431ba10",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4b82e5fe600b4270bb6268e68f76d093",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cd15c803ecc34a8d878df577ffd80252",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "246cac7b5a0b4fd799e7e2081badbdbf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fbc93f4256724314a5141ac29062bae9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b38551b3ac134fef8aa0c6ea3b7fa2a0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "16ddc360a6b64906bd3f1d1adcc94efe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "44b3af87a1794fc09d00dd3743c4705d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAuX0lEQVR4nO3df3zNdeP/8cfZZhhjaD8YCZElGiF98qsx1MwYGlep67q4CqVPF24hJUr50lWuui5CxIeiXZeWaEhURkJEY6j8mB9jx6+xOft99v7+sZrWfpxh55yd7Xm/3brdds55nfd57n3L+7n3+33er7fJMAwDERGRErg5O4CIiFRsKgoRESmVikJEREqlohARkVKpKEREpFQqChERKZWKQkRESuXh7AAiFVlISAgXL17E3d0dLy8vunXrxssvv0ytWrUA+OGHH/jnP//JgQMHcHNzo1OnTkycOJE777yzYBnXrl3jnXfe4csvv+Tq1avcdttt9OzZkzFjxlC/fn1n/WoiZaY9ChEbFixYwL59+1izZg2HDh1i0aJFAOzbt4+RI0fSq1cvtm3bxpYtW7jrrrsYPnw4p0+fBiA7O5snn3ySo0ePsnjxYvbu3cvHH3+Mj48PBw4ccOavJVJm2qMQKSNfX1+6du3K4cOHAXjzzTeJiIjgySefLBjz97//nYSEBP71r38xZ84cPvvsM86dO8fy5csL9kIaNGjAM88845TfQeRmaI9CpIySk5PZtm0bt99+OxkZGezbt49+/foVGffwww+zY8cOAHbs2EG3bt0KSkLEFWmPQsSG3/76T09Pp0uXLjz33HNcvXqVvLw8fH19i4z39fUlJSUFgCtXrtCmTRuH5hUpb9qjELFh3rx57Nu3jxUrVnD8+HFSUlKoU6cObm5uXLhwocj4CxcuUK9ePQB8fHyKHSPiSlQUImXUuXNnIiMjmT17Nl5eXgQHB7Nx48Yi4zZs2ECXLl0A+J//+R+2b99Oenq6o+OKlBsVhcgNePLJJ9mxYweHDx9mwoQJrFmzhuXLl3Pt2jWuXr3K3Llz2b9/P88++ywAERERBAQEMG7cOI4dO0ZeXh4pKSksWLCArVu3Ovm3ESkbFYXIDahfvz4RERHMnz+fjh07snjxYr788ku6devGQw89xOHDh1m5ciV33HEHAJ6enixbtozmzZvz17/+lfvuu4+hQ4eSkpJCu3btnPvLiJSRSTcuEhGR0miPQkRESqWiEBGRUqkoRESkVCoKEREplctdmX3//fcTGBjo7BgiIi4lKSmJXbt23dR7Xa4oAgMDiYmJcXYMERGXEhkZedPv1aEnEREplYpCRERKpaIQEZFSqShERKRUKgoRESmVikJEREplt6KYMmUKDzzwAP379y/2dcMwmDlzJqGhoYSHh5OQkGCvKCIicgvsdh1FZGQkjz/+OJMmTSr29bi4OBITE9m0aRM//vgj06dP57///a+94oiI2FTSZNolzbFd0tTbJS6nxPElLf/G8tiL3YqiU6dOnDlzpsTXt2zZwsCBAzGZTAQHB5Oamsr58+fx8/OzVyQRh8jLM8jMtZKWmUtWTh7Z1jxyrHlczcjBMCDn18fJqZl4uruRmZvH6cvpeFf3wGoY5OUZWA0Dax7kGQa5VoNTl9O5rbYn1l9fy8szyDMgLTOHi9eyqVuzGnmG8et/+RuqPIM/PDbIy4PESxb869S4Pj4v/3XrH95rzTO4mpGDh5sJdzdTod+xxO1UOW3w7L0Broo63MJ7nXZlttlsJiAgoOBxQEAAZrO52KKIjo4mOjoaoOCm9SKOdOlaFklXMkhKyeCX89c4dzWDjGwryamZ5FrzN6hXM3I4n5ZVLp/n7mbC3WTCzQ083NwwDANLtpWGdWvgZsrfcLu7mTABaVm5ZORYqedVDZPJhJsJ3NzccDOZMJnA7bfnTCZMJhMN69bgakYOgfVq4v7rc7+97ub2u59/7YarGTn4161RJGP+pxdlKv7pEkaXNr58ll/SG5yVp8TxJb1wo8sp4ZO//PmGFl+I04qiuL8MSlpRUVFRREVFAbd2GbpIaTJzrCRdyeDAmascu3CNA0lXSUrJ4HxaFlczcop9T5P6NcnJNWjTqA5+dapTo5o7lqxcmvvWJs8w8K5RjTo1PPB0d8vfsJtM1K9VjWq/Pq7l6YFXdXequ7vjVd3913K4sQ2GSFl8+e7Nv9dpRREQEEBycnLB4+TkZB12Eoc4n5rJ+gPn+Pn8NU5dSic7N4+DZ6+Snm0tMvaOBl70axNAM99aNK3vRQ1Pd4IC6hBQzF/YIpWV04oiJCSEDz/8kLCwMH788Ue8vb1VFFKurqbn8M3P57l4LZtvj17kQloWOdY8jiSnFRoXUKcGoXf7U6dGNZr71qK5b23aBdalXi1PJyUXqVjsVhTjx49n9+7dpKSk0L17d8aNG0dubi4Aw4cPp0ePHmzdupXQ0FBq1qzJG2+8Ya8oUkWkWLLZefwSXx4yE3vgHFm5eUXGtPKvzeNdbqdfm4YE3+5D7eouN4GyiMPZ7V/J22+/XerrJpOJV155xV4fL1VEXp7B+9uOs+mQmb0n87/oYDLB/c3qc0eDWrS/3YduLX3x866Oh7uuLxW5GfpzSlxOWmYOGw4ks+3oRdb9eLbg+WGdmtC15W30aOWLd41qTkwoUrmoKMQlpGfn8kVCMv/+6ihJVzLIzMmjRjU3Hr4nAD/v6ozvcxd1a6ocROxBRSEVkjk1k53HL7H3ZAoJZ1MLDisBtPSrzSvhbXigRYMiF4KJSPlTUUiFkJljZe3+s+w8cYlj56/x45mrBa+1DvDmyQea0raxD/3uCdAJaBEH0784cZqsXCuf/3iOD749QcLZ1ILnvTzdGdyhMQ/fE0CnZvV1SEnEyVQU4lDXsnJZ9+NZdh6/xNdHzpOamVvw2lPdmzOmRwtdvyBSwagoxCEupGUxM/YQsfHnyM0zqOZuIqS1HxHBgfQK8qO6h7uzI4pICVQUYjcXr2Xxnz2n2Xgwmfhfzznc5e/N6J7NCWvbCE8PXdcg4gpUFFKurHkGa/YlEf39aXYnXgbgnsA6jOrajAHBjWjX2Me5AUXkhqkopFycu5rBu1uO8nn8WdJ+Pe8Q2T6Qv3VvTlDDOk5OJyK3QkUhN+18WiYffneSb36+UHBoqeudt/FopyY8fE8A1TRlhkiloKKQm/Lt0YuM/egHrmbkENzEh7E9WzAguBGtA7T3IFLZqCjkhr2z+Rfmbv6ZujWr8cmYB7ivaX1nRxIRO1JRyA1ZuPUYczf/zL1NfFg04j786+gGPiKVnYpCysQwDOZ/c4w3v/gJkwlWjrqfWppKQ6RK0L90senboxeZsS6Bn83XuLthHd4c2k4lIVKF6F+7lOqn5DQeW7wLgNcH3cOwTrdrxlaRKkZFISU6fTmdIQt24Onhxv+LbEtkh8bOjiQiTqCikGLlWPMYsWQXaZm5fPxUF7o0b+DsSCLiJLoiSor1ytoEEi+lM7xzE5WESBWnopAiTl9OZ+WuU/h5V2dWZDtnxxERJ1NRSBF/W74HgJkD73FyEhGpCFQUUsjibcc5kpxG5zvq06dNgLPjiEgFoKKQApsPmZkZe5jWAd4s+XNHZ8cRkQpCRSEAJF3JYNSvh5zeGdYe7xq6T7WI5FNRCIZh8FRBSQRzV4C3kxOJSEWiohAWxR0n4Wwqj3e5nYjgQGfHEZEKRkUhbDiYDMAr4W2cnEREKiIVRRW34+hF9p++QkRwI92RTkSKpS1DFZaXZ/D6+sMAvNCvtZPTiEhFpaKowp77eB8JZ1Pp1vI2An1qOjuOiFRQKooqas2+JD6PP4enuxtL/9zJ2XFEpAKza1HExcXRt29fQkNDWbRoUZHX09LSGD16NAMGDCAsLIxPPvnEnnHkVxnZVp6P3g/A7qm98NC5CREphd22EFarlVdffZXFixcTGxvL559/ztGjRwuN+eijj2jRogVr165lxYoVzJ49m+zsbHtFkl+9v+04ALMHt8XHy9PJaUSkorNbUcTHx9O0aVOaNGmCp6cnYWFhbNmypdAYk8mExWLBMAwsFgt169bFw0O3yLCnK+nZvP3lz9Ss5s7Q+5o4O46IuAC7bZXNZjMBAdcnlfP39yc+Pr7QmMcee4wxY8bQrVs3LBYLc+fOxc2taHdFR0cTHR0NQEpKir0iVwn/3PwLABP6tMJNtzQVkTKwW1EYhlHkOZOp8IZp+/btBAUFsXz5ck6dOsVf/vIXOnbsSO3atQuNi4qKIioqCoDIyEh7Ra70MnOsLNuRSP1anozq1tzZcUTERdjt0FNAQADJyckFj81mM35+foXGxMTE0KdPH0wmE02bNqVx48YcP37cXpGqvP+34QgAI7o0dXISEXEldiuKtm3bkpiYyOnTp8nOziY2NpaQkJBCYxo2bMh3330HwMWLFzlx4gSNGze2V6QqLTUzh//7LpHmvrV4vndLZ8cRERdit0NPHh4eTJs2jVGjRmG1Whk8eDAtW7Zk1apVAAwfPpyxY8cyZcoUwsPDMQyDiRMnUr9+fXtFqtJejDmAYeTP5/THQ4AiIqWx61eMevToQY8ePQo9N3z48IKf/f39+eCDD+wZQYBTl9L5PP4cdzTwokcrX2fHEREXoyutqoCpaw4AMCNC98AWkRunoqjkVu89w7ZfLtKmUR3tTYjITVFRVHJvfpH/TafZg9s5OYmIuCoVRSW2+ZAZc2oWD97ZgHsC6zo7joi4KBVFJXU+NZNRv94He9Yg7U2IyM1TUVRChmHwp8W7AJgzpB23N/ByciIRcWUqikro9djDHD1/jQ63+zD0Pl3AKCK3RkVRySzYeozF208Q1LAOq0f/jy6uE5FbVuaiSE9Pt2cOKQcbDpzj/204QjV3E//3106aHVZEyoXNovjhhx945JFHeOSRRwA4cuQI06dPt3cuuUEJZ68y5qMf8K7uwebxPfDzruHsSCJSSdgsilmzZrFkyRJ8fHwAaN26NXv27LF3LrkBaZk5zFh7CIB/DgumaYNaTk4kIpVJmeZ6atiwYaHHxd1cSJzDkpXLiCW72X/6Cs/3bkmvIH9nRxKRSsZmUTRs2JAffvgBk8lEdnY2K1asoEWLFo7IJjYYhsHoD/ey//QVngu5k+d7t3J2JBGphGzuGkyfPp2PPvoIs9lMjx49OHz4MK+88oojskkpzqdm0mduHNt+uUj/dg0Z3+cuZ0cSkUrK5h7FiRMneOuttwo9t3fvXu677z67hZLSWfPy9yR+OX+N50LuZFwv3YhIROzH5h7FzJkzy/ScOIYlK5c/L93ND6euENkhkPF97qKau84ZiYj9lLhHsW/fPvbt28fly5dZunRpwfPXrl3DarU6JJwUdvKShcj5O7hkyWZEl6a8NlD3lxAR+yuxKHJyckhPT8dqtWKxWAqer127Nu+++65Dwsl1Jy9ZiFq4k0uWbF4beA8jujR1diQRqSJKLIrOnTvTuXNnBg0aRGBgoCMzye8YhsHy707yytoEAF6LaKOSEBGHsnkyu2bNmsyePZujR4+SlZVV8Pzy5cvtGkwgL89gckw8/9lzhrv8vZkbFczdjeo4O5aIVDE2z4JOnDiR5s2bc+bMGZ599lkCAwNp27atI7JVaYkXLTzy7jb+s+cMoXf7s25cV5WEiDiFzaK4cuUKQ4cOxcPDg86dOzNr1ix+/PFHR2SrshIvWoh8bwfHL1qY1v9uFo24D08PfbNJRJzD5qEnD4/8IX5+fnzzzTf4+fmRnJxs92BV1Ue7TjL104MALPtLJ3re5efkRCJS1dksijFjxpCWlsakSZN47bXXsFgsvPjii47IVuV8uu8MUz89SAvfWrwzrL3ucy0iFYLNonjooYcA8Pb2ZsWKFUD+ldlSfgzD4IkPdrPtl4sArHqqi6YJF5EKo8SisFqtbNiwAbPZTLdu3WjVqhVff/01CxcuJDMzkzVr1jgwZuWVnp1Lu+mbyM0zuLdxXd4Z1l4lISIVSolFMXXqVM6dO0e7du2YOXMmgYGB7Nu3j4kTJ9K7d29HZqy0Dp9LZfSHe8nNM+jfriHvDmuvu9KJSIVTYlEcPHiQtWvX4ubmRlZWFl26dGHTpk34+vo6Ml+ldTUjh4ff2UaNam7Mf6wDj7RtaPtNIiJOUOJ3LqtVq1Zwg6Lq1atzxx13qCTK0V+XfQ/AnCH3qiREpEIrcY/i+PHjhIeHFzw+depUocfr1q2zb7JKbMXOk+w9mYKPVzUG3NvI2XFEREpVYlGsX7/ekTmqjKQrGby85iA1qrmxZXwPZ8cREbGpxKLQRIDl7+j5NELnxgHw4cj7aVC7upMTiYjYZtd5IeLi4ujbty+hoaEsWrSo2DG7du0iIiKCsLAwHn/8cXvGcapDZ1MZOG8HhgHvDm9PxzvqOzuSiEiZ2Lzg7mZZrVZeffVVli5dir+/P0OGDCEkJIQ777yzYExqaiozZsxg8eLFNGrUiEuXLtkrjlP9NsGfyQRvDb1X5yVExKWUaY8iMzOT48eP39CC4+Pjadq0KU2aNMHT05OwsDC2bNlSaMy6desIDQ2lUaP8DWeDBg1u6DNcwaaEZPq9k3+4acHj9zH4vsZOTiQicmNsFsVXX31FREQEo0aNAuDw4cOMHj3a5oLNZjMBAQEFj/39/TGbzYXGJCYmkpqayogRI4iMjKx0V3vP+/ooT63Yi09NT778e3f6tgmw/SYRkQrG5qGnf//736xevZoRI0YAEBQURFJSks0FG4ZR5DmTqfBVx1arlYSEBJYtW0ZmZibDhg3j3nvvpVmzZoXGRUdHEx0dDUBKSorNz3Y2a57BI+9s4ydzGm0D67L8r52pV8vT2bFERG6KzaJwd3fH29v7hhccEBBQaDpys9mMn59fkTH16tXDy8sLLy8vOnbsyJEjR4oURVRUFFFRUQBERkbecBZHi3xvBz+Z0+jcrD4f/62LpuUQEZdm89BTy5YtWbduHVarlcTERF577TXat29vc8Ft27YlMTGR06dPk52dTWxsLCEhIYXG9OrViz179pCbm0tGRgbx8fG0aNHi5n+bCuCz/Un8ePoK1T3ciH5KJSEirs/mHsXLL7/MggUL8PT0ZMKECXTt2pWxY8faXrCHB9OmTWPUqFFYrVYGDx5My5YtWbVqFQDDhw+nRYsWdOvWjQEDBuDm5saQIUNo1arVrf9WTnLpWhb/+/F+AL6e2LPIoTYREVdkMoo7mfA7hw4d4u6773ZUHpsiIyOJiYlxdoxiDV2wg+8TU1g04j766MS1iFQgt7LttLlHMWvWLC5cuEC/fv0ICwujZcuWN/VBld3ibcf5PjGFe5v4qCREpFKxWRQrVqzgwoULbNiwgZdffhmLxcLDDz9cpsNPVcX+01eYGXuY2+t78fHfujg7johIuSrTBXe+vr488cQTzJgxg9atWzN//nx753IZKZZsBs77FoB5f+pATU93JycSESlfNvcojh07xvr16/niiy/w8fHhkUceYfLkyY7I5hKeWrEHgGceakHbxnWdnEZEpPzZLIopU6YQFhbGkiVL8Pf3d0Qml3HykoXvE/MvAJzY5y4npxERsQ+bRfGf//zHETlczvEL14j4d/4hp8VPdNRXYUWk0iqxKP73f/+Xd955p9Bd7X6vKt/h7mpGDiOW7CYtK5f/+2tnerTSLWJFpPIqsSimTp0KwIIFCxwWxhWkZ+fy9Io9JF3J4LWINioJEan0SvzW02/zMq1cuZLAwMBC/61cudJhASuSA2eu0nHmZnYev8zTPZoz4oE7nB1JRMTubH49dseOHUWei4uLs0uYiuxCWhZDFuwgPdvKkic7MuXhIGdHEhFxiBIPPa1cuZJVq1Zx+vTpQucpLBYLHTp0cEi4iiIvz+DPS3eTlZvHv4a3p1eQvv0lIlVHiUURHh5O9+7defvtt5kwYULB87Vq1cLHx8cR2SqMCf/9kYSzqYwPbUW4bmMqIlVMiUVhMplo3Lgx06ZNK/LalStXqkxZxP18gU/3JfHQXb6MC7nT9htERCqZEotiwoQJLFy4kMjISEwmU6E71plMpiL3v66sXvv8EABvDr1X10qISJVUYlEsXLgQyL9ndlX1yd4z/HL+GoM7NOa22tWdHUdExClsfutp7969pKenA/DZZ58xa9Yszp49a/dgFcGbX/wEwOSHWzs5iYiI89gsiunTp1OzZk2OHDnC4sWLadSoES+88IIjsjnV1z+dJzk1ky7N6+Prrb0JEam6bBaFh4cHJpOJzZs388QTT/Dkk09isVgckc2p3vvmGAAvhVWcu/uJiDiDzaKoVasWCxcuZO3atfTs2ROr1Upubq4jsjnNJ3vPsPvEZYZ3bsI9gZo6XESqNptFMXfuXDw9PXnjjTfw9fXFbDYzcuRIR2Rziv/bkciE//4IwKhuzZ2cRkTE+WwWha+vL+Hh4aSlpfH1119TvXp1Bg4c6IBojmdOzeSVtQkA7JgcQgvf2k5OJCLifDaLYv369QwdOpSNGzeyYcOGgp8rm8wcK396fyeQf0vTRj41nZxIRKRisHnjogULFrB69WoaNGgAwOXLl/nzn/9Mv3797B7OkZZsP8GxCxYGtQ8krF1DZ8cREakwbO5RGIZRUBIAPj4+ha7Srgzifr7Am1/8xJ1+tXn70XudHUdEpEKxuUfRtWtXRo4cSVhYGJB/KKp79+52D+ZIL6yOB2D+Yx00TYeIyB/YLIpJkyaxadMm9u7di2EYREVFERoa6ohsDvGzOY3k1ExCWvvRyt/b2XFERCqcEosiMTGR2bNnc/r0aVq1asWkSZPw969892F4e9PPADyrmWFFRIpV4jmKF198kYceeoh3332XNm3a8Nprrzkyl0Mcv3CNjQnJhLT2o8Pt9ZwdR0SkQipxj8JisfDoo48C0Lx5cwYNGuSwUI7yl2XfA/Byf03TISJSkhKLIisri0OHDhV8wykzM7PQ4zZt2jgmoZ0cu3CNk5fSqV3dg2a31XJ2HBGRCqvEovD19WXWrFkFj2+77baCxyaTieXLl9s/nR3NWn8YgFV/6+LkJCIiFVuJRbFixQpH5nCojGwrmw+fB6BtY036JyJSGpsX3FVGE1fnT/r3996tnJxERKTis2tRxMXF0bdvX0JDQ1m0aFGJ4+Lj4wkKCnLIHFIplmxi488B8MxDLez+eSIirs5uRWG1Wnn11VdZvHgxsbGxfP755xw9erTYcf/4xz/o2rWrvaIU8tjiXQD8a3h7PNyr5A6ViMgNKdNcT5999hn//ve/ATh79izx8fE2FxwfH0/Tpk1p0qQJnp6ehIWFsWXLliLjVqxYQd++fQvNJ2Uvy749waFzqfRrE0D4vY3s/nkiIpVBme6ZvX//fmJjY4H8O97NmDHD5oLNZjMBAQEFj/39/TGbzUXGbN68mWHDhpW6rOjoaCIjI4mMjCQlJcXmZxfHMAymrzuEl6c77wwPvqlliIhURTaLIj4+nldeeYXq1asDULduXXJycmwuuLgZZv844d7rr7/OxIkTcXd3L3VZUVFRxMTEEBMTQ716N3cF9c7jlwEYcG8jqnuU/nkiInKdzUkBPTw8sFqtBRv5y5cv4+Zm+9h+QEAAycnJBY/NZjN+fn6Fxhw8eJDx48cDkJKSwtatW/Hw8KB379439EvYYsnKZdIn+YfLxvbUnE4iIjfCZlGMGDGCZ555hkuXLjF37lw2btzI888/b3PBbdu2JTExkdOnT+Pv709sbCxvvfVWoTFfffVVwc+TJ0+mZ8+e5V4SAM+s/IFTl9N57P7bub2BV7kvX0SkMrNZFAMGDKBNmzbs3LkTwzCYP38+LVrY/lqph4cH06ZNY9SoUVitVgYPHkzLli1ZtWoVAMOHD7/19GWwJ/Ey3/x0gdYB3rw+qK1DPlNEpDIxGTZuV3f27Nlin2/UyDnfGoqMjCQmJqbM45/4YDdxP1/gqwk9aO5b247JREQqrhvddv6ezT2Kp59+uuDnrKwszpw5Q7NmzQq+BVWRHT6XStzPF+jW8jaVhIjITbJZFOvWrSv0OCEhgejoaLsFKk/Lvk0E4K9dmzk3iIiIC7vhS5PbtGnDgQMH7JGlXKVn5/Lp/iS8q3vw0F1+tt8gIiLFsrlHsXTp0oKf8/LyOHToEPXr17drqPKw/LuTZOfm8XJYkLOjiIi4NJtFYbFYCn52d3enR48e9O3b166hblVensHsjUcAiGgf6OQ0IiKurdSisFqtWCwWJk2a5Kg85eL19YcxDHgu5E7q1Kjm7DgiIi6txHMUubm5uLu7c+jQIUfmuWWGYbBk+wkAxvVq6eQ0IiKur8Q9iqFDh/Lpp58SFBTE6NGj6devH15e169q7tOnj0MC3qi4Xy4CENk+kGqaRlxE5JbZPEdx9epV6tWrx65duwo9X1GLYlNC/vxS4/vo7nUiIuWhxKK4dOkSS5cupWXLlphMpkKzwf5xFtiK5KNdp2jlX5vG9TSnk4hIeSixKPLy8gp948kVfPHr3sR9TSv+13dFRFxFiUXh6+vLs88+68gst2zzofwbIz3fWyexRUTKS4lne23MFVjh5Fjz+O/eM/jXqY5/nRrOjiMiUmmUWBTLli1zYIxb9/H3pwF4urvtKdBFRKTsSiwKHx8fB8a4ddt+vgDAEw80dXISEZHKpVJcaHA+LZNNh8wMuLcRHrp2QkSkXFWKreoH2xMBGKR5nUREyl2lKIoFW48B0KOVr5OTiIhUPi5fFAeTrgIQ1q4hbm4V90JAERFX5fJFMffLnwEY00PfdhIRsQeXLopcax47j18i0Kcm9wTWdXYcEZFKyaWL4qsj57FkW3mu153OjiIiUmm5dFH8dpHdI20bOjmJiEjl5dJF8e3Ri9xe3wtv3cVORMRuXLYosnKtZOXm0fGOes6OIiJSqblsURy/kD8Feks/bycnERGp3Fy2KL46ch6AB+9s4OQkIiKVm8sWxfZfLhLoU5N2jX2cHUVEpFJz2aLYc/IyLf1rOzuGiEil55JFcdmSTY7VwN9bNygSEbE3lyyKj78/BUDnZro3toiIvblkUVzLzAUgsoOmFRcRsTe7FkVcXBx9+/YlNDSURYsWFXl97dq1hIeHEx4ezrBhwzhy5EiZlvvtsUt4ebpjMmm2WBERe7NbUVitVl599VUWL15MbGwsn3/+OUePHi00pnHjxnz44YesW7eOMWPG8PLLL5dt4YZB3Zq6GltExBHsVhTx8fE0bdqUJk2a4OnpSVhYGFu2bCk0pkOHDtStmz/ra3BwMMnJyWVa9qnL6bRrrNliRUQcwcNeCzabzQQEBBQ89vf3Jz4+vsTxq1evpnv37sW+Fh0dTXR0NAApKSmkpOfQtEGt8g0sIiLFsltRGIZR5LmSzins3LmT1atXs3LlymJfj4qKIioqCoDwiIEAVPdwyfPwIiIux25FERAQUOhQktlsxs/Pr8i4I0eO8NJLL/H+++9Tr57tCf6uZVkB6K77Y4uIOITd/ixv27YtiYmJnD59muzsbGJjYwkJCSk05uzZs4wbN445c+bQrFmzMi03Kze/KIIa1in3zCIiUpTd9ig8PDyYNm0ao0aNwmq1MnjwYFq2bMmqVasAGD58OPPmzePKlSvMmDEDAHd3d2JiYkpf8K9HtGpXt1t0ERH5HZNR3MmECqx9977kPjSeAzP6OjuKiIjLiIyMtP2HeAlc7oxwbp5BNZ3IFhFxGJfb4hqGQT0vXWwnIuIoLlcUWbl5BDfR7U9FRBzF5YoCIM+1TquIiLg0lyyKoIa6T7aIiKO4ZFF4uLlkbBERl+SSW9w7bvNydgQRkSrDJYuiZjVdbCci4iguWRSNfHSvbBERR3HJotBNi0REHMcli6Kau0vGFhFxSS65xdWtskVEHMc1iwI1hYiIo7hmUagnREQcxiWLQkREHMcli0J7FCIijuOaRaFzFCIiDuOaRaGeEBFxGNcsCmcHEBGpQlyzKLRLISLiMK5ZFM4OICJShbhmUagpREQcxkWLQk0hIuIoLlkUIiLiOCoKEREplYpCRERK5XJFobMTIiKO5XJFISIijuV6RaFdChERh3K9olBTiIg4lAsWhYiIOJLLFYX2J0REHMvlikJERBzLrkURFxdH3759CQ0NZdGiRUVeNwyDmTNnEhoaSnh4OAkJCfaMIyIiN8FuRWG1Wnn11VdZvHgxsbGxfP755xw9erTQmLi4OBITE9m0aROvvfYa06dPt1ccERG5SXYrivj4eJo2bUqTJk3w9PQkLCyMLVu2FBqzZcsWBg4ciMlkIjg4mNTUVM6fP1/qcnWOQkTEsTzstWCz2UxAQEDBY39/f+Lj40sdExAQgNlsxs/Pr9C46OhooqOj8wNfSyYyMtJesV1KSkoK9erVc3aMCkHr4jqti+u0Lq47ceLETb/XbkVhGEaR5/44PXhZxgBERUURFRUFQGRkJDExMeWU0rVpXVyndXGd1sV1WhfX3cof2HY79BQQEEBycnLB4+L2FP44Jjk5ucgYERFxLrsVRdu2bUlMTOT06dNkZ2cTGxtLSEhIoTEhISGsWbMGwzDYv38/3t7eKgoRkQrGboeePDw8mDZtGqNGjcJqtTJ48GBatmzJqlWrABg+fDg9evRg69athIaGUrNmTd544w2by/3tEJRoXfye1sV1WhfXaV1cdyvrwmQUd6JARETkV7oyW0RESqWiEBGRUlXYotD0H9fZWhdr164lPDyc8PBwhg0bxpEjR5yQ0jFsrYvfxMfHExQUxMaNGx2YzrHKsi527dpFREQEYWFhPP744w5O6Di21kVaWhqjR49mwIABhIWF8cknnzghpf1NmTKFBx54gP79+xf7+k1vN40KKDc31+jVq5dx6tQpIysrywgPDzd++eWXQmO++eYbY+TIkUZeXp6xb98+Y8iQIU5Ka19lWRd79+41rly5YhhG/nqpyuvit3EjRowwRo0aZWzYsMEJSe2vLOvi6tWrxsMPP2wkJSUZhmEYFy9edEZUuyvLunjvvfeMOXPmGIZhGJcuXTI6depkZGVlOSOuXe3evds4ePCgERYWVuzrN7vdrJB7FPaa/sMVlWVddOjQgbp16wIQHBxc6NqUyqQs6wJgxYoV9O3blwYNGjghpWOUZV2sW7eO0NBQGjVqBFBp10dZ1oXJZMJisWAYBhaLhbp16+LhYbcvfTpNp06dCrYFxbnZ7WaFLIripv8wm82ljvlt+o/Kpizr4vdWr15N9+7dHRHN4cr6/8XmzZsZNmyYo+M5VFnWRWJiIqmpqYwYMYLIyEjWrFnj4JSOUZZ18dhjj3Hs2DG6devGgAEDmDp1Km5uFXLzZ1c3u92skJVqlOP0H67uRn7PnTt3snr1alauXGnvWE5RlnXx+uuvM3HiRNzd3R0VyynKsi6sVisJCQksW7aMzMxMhg0bxr333kuzZs0cFdMhyrIutm/fTlBQEMuXL+fUqVP85S9/oWPHjtSuXdtRMSuEm91uVsii0PQf15VlXQAcOXKEl156iffff7/SToJWlnVx8OBBxo8fD+RPCLd161Y8PDzo3bu3Q7PaW1n/jdSrVw8vLy+8vLzo2LEjR44cqXRFUZZ1ERMTw1NPPYXJZKJp06Y0btyY48eP065dO0fHdaqb3W5WyH0vTf9xXVnWxdmzZxk3bhxz5sypdBuB3yvLuvjqq68K/uvbty+vvPJKpSsJKNu66NWrF3v27CE3N5eMjAzi4+Np0aKFkxLbT1nWRcOGDfnuu+8AuHjxIidOnKBx48bOiOtUN7vdrJB7FPaa/sMVlWVdzJs3jytXrjBjxgwA3N3dK+WMmWVZF1VFWdZFixYtCo7Ju7m5MWTIEFq1auXk5OWvLOti7NixTJkyhfDwcAzDYOLEidSvX9/Jycvf+PHj2b17NykpKXTv3p1x48aRm5sL3Np2U1N4iIhIqSrkoScREak4VBQiIlIqFYWIiJRKRSEiIqVSUYiISKlUFFIhBQUFERERUfDfmTNnShzbvn37W/68yZMnExISQkREBIMGDWLfvn03vIypU6dy9OhRABYsWFDotfKaUuS39dK/f39Gjx5NampqqeMPHz7M1q1by+WzperS12OlQmrfvn2ZN9Y3MrYkkydPpmfPnvTr14/t27cze/Zs1q1bd9PLK49MtpY7adIk7rjjDsaMGVPi+JiYGA4ePMi0adPKPYtUHdqjEJdgsVh48sknGTRoEOHh4WzevLnImPPnz/PYY48V/MW9Z88eIH+en6ioKAYNGsRzzz2HxWIp9bM6derEqVOnAFi6dCn9+/enf//+LFu2DID09HSeeuopBgwYQP/+/Vm/fj0AI0aM4MCBA/zjH/8gMzOTiIgIJkyYAFzf63n++ecL/YU/efJkvvjiC6xWK7Nnz2bw4MGEh4fz8ccf21wnwcHBBRO6xcfHM2zYMAYOHMiwYcM4fvw42dnZvPvuu6xfv56IiAjWr19Peno6U6ZMYfDgwQwcOLDY9ShSxC1Nfi5iJ61btzYGDBhgDBgwwBg7dqyRk5NjpKWlGYaRfz+B3r17G3l5eYZhGEZwcLBhGIaxZMkSY/78+YZh5N+jIC0tzbh06ZLxpz/9ybBYLIZhGMbChQuNf/3rX0U+b9KkSQX3rli/fr0xZMgQ48CBA0b//v0Ni8ViXLt2zXjkkUeMhIQEY+PGjcbUqVML3puammoYhmE8/vjjRnx8fKFMv/nt8aZNm4wXXnjBMAzDyMrKMrp3725kZGQYH3/8sTFv3ryC5wcNGmScOnWqSM7flpObm2uMGzfO2Lp1q2EYhpGWlmbk5OQYhmEY3377rfHss88ahmEYn3zyiTFjxoyC97/11lvGmjVrDMPIv19Fnz59CtaNSEkq5BQeIjVq1OCzzz4reJyTk8Pbb7/N999/j5ubG2azmYsXL+Lr61swpm3btrz44ovk5ubSu3dvgoKC+Prrrzl69GjB9B45OTkEBwcX+5lz5szhvffeo379+rz++ut899139O7dGy8vLwBCQ0PZs2cP3bp1Y/bs2bz55ps89NBDdOzYscy/V/fu3Zk5cybZ2dnExcXRsWNHatSowbfffstPP/3EF198AeTfke3kyZM0adKk0Pt/21NJSkqiTZs2PPjggwXjJ02axMmTJzGZTOTk5BT7+du3b+err77igw8+ACArK4tz585VyjmgpPyoKMQlrFu3jsuXLxMTE0O1atUICQkhKyur0JhOnTrx4YcfsnXrVl544QVGjhxJnTp1ePDBB3n77bdtfsYLL7xAv379Ch7v2LGj2HHNmjUjJiaGrVu38tZbb/Hggw/y7LPPlun3qF69Op07d2bbtm1s2LCBsLAwIH/655deeolu3bqV+v7fCjQtLY2nn36ajz76iCeeeIJ33nmH+++/n3nz5nHmzBmeeOKJEpfx7rvv0rx58zLlFQGdoxAXkZaWRoMGDahWrRo7d+4kKSmpyJikpCQaNGjAo48+yuDBg0lISCA4OJgffviBkydPApCRkcGJEyfK9JmdOnVi8+bNZGRkkJ6ezubNm+nYsSNms5maNWsSERHByJEjOXToUJH3enh4lPhXfVhYGDExMezZs4euXbsC0LVrV1atWlXwnhMnTpCenl5iNm9vb1566SU++OADcnJySEtLw9/fH4BPP/20YFytWrUKnZPp2rUrH374YcF9CYrLLvJH2qMQlxAeHs6YMWOIjIwkKCio2L+Id+/ezZIlS/Dw8MDLy4vZs2dTv359Zs2axfjx48nOzgbyTyiXZTr2Nm3aEBkZydChQwEYMmQId999N9u2bWPOnDm4ubnh4eHB9OnTi7z30UcfZcCAAdx999289dZbhV578MEHmTRpEiEhIXh6egIwdOhQkpKSiIyMxDAM6tWrx/z580vNd/fdd9O6dWtiY2MZNWoUkydPZunSpXTp0qVgzP3338+iRYuIiIjg6aefZuzYsbzxxhsMGDAAwzAIDAxk4cKFNteFVG36eqyIiJRKh55ERKRUKgoRESmVikJEREqlohARkVKpKEREpFQqChERKZWKQkRESvX/Aa/+QEs9g50JAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****** Crossval split: 4/4 ******\n",
      "\n",
      "Filtering training data\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "be5426abaf5b41ebb51e2567dd73b0a4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Filtered 35%; 32428 remain\n",
      "\n",
      "Filtering evalation data\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ff5aad423e4f4bbab54518bc5f0fd028",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Filtered 53%; 23660 remain\n",
      "\n",
      "Labeling training data\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78c25d0976854653be92baf65ca71158",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Labeling evaluation data\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c445de0805e145249f4647e5552292a2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Labeling evaluation OOS data\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c553f188f56e47acafa77fab9cb2b21f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
      "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "<ipython-input-15-21e1cede4c54>:45: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='834' max='834' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [834/834 01:35, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>0.663500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>0.601800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>0.486200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>0.340400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>500</td>\n",
       "      <td>0.242700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>600</td>\n",
       "      <td>0.202300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>700</td>\n",
       "      <td>0.153600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>800</td>\n",
       "      <td>0.124400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0e1c475ab2ff4bfa8c65a24d587c8ad0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2ee8ff99342d4741a3f4ec4176b5d746",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78a1a6af9439481ebe87731bb2d37c95",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "411ed284d33740eca1f0cef18df500a4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aafdf3014691426c9c6acca3834c45f2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5aa3add5de134f589eaab69087b66549",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7d255e53e1c2408697da1fa08860c9c0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "29b8945f64354ae1b840a1dc316dedbf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "de251d1fba3d4a67893047ee8275d606",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8928cf69ea8746b2bef14028c0c0274a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c0c4e21626f4ab99ce0696ee9322e0c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e3499a2376d43bab0086cba34d1b522",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f33d4f879c294c6a8a6455b3692488d5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "38dd78e3ebf44c2bad58f9576a525ab3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b052e8b179584043945b49de9af31676",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e3e11781b4394db1a01454ef37a490f2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "915efb0adfb44c5caa01cf213c3cd56b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ceb10f0f87d044ebab534aefef5ec69c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "31f4bd65079e4983b8a1937901cfbace",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ccb5be44b5494de8862488f82bf01741",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9da6bd7370db44889cab2fb81dcebe11",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "12bddf69336d481fb0076dced187523c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b89b616cd8064d248b37cc642a09b9bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9346181e5b8b4f1b9a562ca676f87d38",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "de9f0442fc1e43f8bb06e4cecf719d67",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAt90lEQVR4nO3de1jUdd7/8efAAIKigHLwFJ5XdC0ztVpPSZIWIooaepd1t/qrdHPvXXNTs0w7ubpb3bVbqWvppaWxGVnmIdNKM0tv08JjqykeUEZRTg7nme/vDxQjhBnNmWHw9bgurouZ+cx33vMpvy8+38PnYzIMw0BERKQaPp4uQEREajcFhYiI1EhBISIiNVJQiIhIjRQUIiJSIwWFiIjUSEEhIiI1Mnu6AJHaLDY2lqysLHx9fQkKCqJPnz48/fTT1K9fH4CdO3fyv//7v+zevRsfHx969OjB5MmTadeuXcU2zp8/z6uvvspnn31Gbm4uTZo04Y477mD8+PGEhYV56quJOE0jChEH5s2bx65du1i5ciX79u1jwYIFAOzatYuxY8dy55138tVXX7Fx40Z+85vfMHr0aI4fPw5ASUkJDz74IIcOHWLhwoV89913vPfee4SEhLB7925Pfi0Rp2lEIeKk8PBwevfuzf79+wH429/+RmJiIg8++GBFmz//+c/s3buXf/zjH8ydO5ePPvqIU6dOsWTJkopRSOPGjfnDH/7gke8gcjU0ohBxUmZmJl999RU33HADhYWF7Nq1i0GDBlVpd/fdd7N161YAtm7dSp8+fSpCQsQbaUQh4sDFv/4LCgq47bbb+OMf/0hubi52u53w8PAq7cPDw8nOzgYgJyeHzp07u7VekWtNIwoRB15//XV27drF0qVLOXz4MNnZ2TRs2BAfHx/OnDlTpf2ZM2cIDQ0FICQk5LJtRLyJgkLEST179iQpKYk5c+YQFBRE165dWbduXZV2a9eu5bbbbgPgd7/7HVu2bKGgoMDd5YpcMwoKkSvw4IMPsnXrVvbv38/jjz/OypUrWbJkCefPnyc3N5dXXnmF77//nsceewyAxMREoqKimDhxIj/99BN2u53s7GzmzZvHpk2bPPxtRJyjoBC5AmFhYSQmJvLGG2/QvXt3Fi5cyGeffUafPn3o378/+/fvZ9myZbRq1QoAf39/Fi9eTJs2bfj973/PLbfcwsiRI8nOzubGG2/07JcRcZJJCxeJiEhNNKIQEZEaKShERKRGCgoREamRgkJERGrkdXdm33rrrTRv3tzTZYiIeJWMjAy2bdt2Ve/1uqBo3rw5qampni5DRMSrJCUlXfV7dehJRERqpKAQEZEaKShERKRGCgoREamRgkJERGqkoBARkRq5LCimTZvG7bffzuDBgy/7umEYPP/888TFxZGQkMDevXtdVYqIiPwKLruPIikpifvvv58pU6Zc9vXNmzeTnp7O+vXr+eGHH5g5cybvv/++q8oREfnVLk62bRhgNwzK7NfH5NsuC4oePXpw4sSJal/fuHEjQ4cOxWQy0bVrV/Ly8jh9+jQRERGuKklELjAMA2uJjcISG6U2OyVldvKKSim1GRSX2cjMLcLHZKLMbmC3l+8QbYaBzWbHZsDxcwU0CvSjxGantMxOcZmd/KJSyuxGxU7UZjewG+WfZTN+9rvdID3LSnjDehiGcaHtpdfsF9raDYOSMjuncosIq++Pj6l8B2387Dtc+r3yY2pqR3mNF5pVNLz4fHXv83bdfsV7PXZntsViISoqquJxVFQUFovlskGRkpJCSkoKQMWi9SJSWZnNzsHT5zl61sqJ7EJOZBdy8HQ+9cy+7D+Vh7/Zh/SzBfj7+lBis1+Tz/TzNeHn64Ofrw8NAswEmH3w8THhYwIfk6n8x+dnv194vllIIOcKSrghLAhfkwmTyYTvxXY+5W19L7QtKrMBEBLkjwkwmcCECSp+58Lvl56D8jaXfv/Z86YL766h3eW2z8X3XXjebkB+USkRwfWuSV+62mf/ufr3eiwoLrde0sX/0L+UnJxMcnIy8OtuQxfxRoZh8NMZK0eyrJwvLqWo1E52QQl7MnJJzyrg+LkCisvsVXb+vj4mmjTwp7DExm+igjEMuKllCP6+PjQNCaSwpIxmIYEE+vnib/bBZjdoFOhHcD0/Avx8CDD7EBzgh6+vCV+TCV+fSz9mHxNB/r7V/puV2uez167+vR4LiqioKDIzMyseZ2Zm6rCTXPcMw+Do2QK2HMpi/6k81u+zcCa/+LJt/c0++PmYiG5cn9ZN6hMeHEBkw3p0bBpMu/AGtAgN1I5crgmPBUVsbCzvvPMO8fHx/PDDDwQHByso5LpSUmZn36k8thw8w6HT5/nhRC5HsqyV2kQ2DKBfh3B6tWtM15ahNGngTz0/X4LrmQmu5+ehyuV647KgmDRpEtu3byc7O5u+ffsyceJEysrKABg9ejT9+vVj06ZNxMXFERgYyIsvvuiqUkRqheIyGzvSs1nyTTqZuUX8cCK30uv+vj60blKfm1uGcFfnKHq2DiOsvr+HqhW5xGVB8fLLL9f4uslk4plnnnHVx4t4jM1ucOj0ebLOF3Miu4AN+0/z/fGcKoeQRt7SghahQcTf2JS24fV1mEhqLa9bj0KkNskrKuXHzHwseUVs3H+aL348TU5BaZV2TRvVI+GmZtzWJowBMZFENvSOK2VEQEEh4pBhGBSW2sjILuTo2QJ2HM0m7UQOP2bmc9ZaUqltgNmHHq1CuSU6jF7tGtMyNIhmIYH4mzVbjngvBYXIz1iLy9iTkcvOYzl8fSiLw2fOk1tYirXEVqVt2/D6DPptFJ2bNaJDZAM6NWtIkL/+SUndo/+r5bpQarOTmVtERk4hBy352OwGp/OLyS4o4YfjuTSoZ+boWSuWvMrnEQL9fOneKpTftW2Crw+0btKAjlHBtAwL8tA3EXE/BYV4pfyiUk5kF5KRXcipvCKOnbXi5+tDUamdg6fzCfL35fi5QvadysNkqn4KhiYN/Akw+3K+uIxOTRsysHMQN98Qwq2tGxPZsB6+PjrBLKKgkFrh4txDWRf+yj97voSTuYVYi23kFJZw0HKes+eLycgpIuv85W9AA2gQYMbP10SpzeDGFo0YcUsLDANaNQ7CZhjc1DKE5iGBhAT60SjIjwCzrxu/pYh3UlCISxiGQU5BKeeLyzhrLeGctZiTOUUcOn2ec9YS7IbBgcx8CktsnC8uI7ew6pVCvxRg9qFDZDC3RIcQHhxA+4hgmjaqR6sm9WnaqJ5uQBNxEQWFXLVSm50tB7PYdSybk7lFlJSVz0G0/cg5isscTzrXuVlDzD4m2oTXp31EMAAdo4Jp3MCfIH8z9QN8uSEsiIb1/PDRISARj1FQiFPO5BdjySsi/ayVbw+fJe1ELmm/uLMYoFPThvRpH46/2UTLsCDaNmmAv9mHiOAAWjWpT2iQP4H+Otwj4k0UFFJJbkEpWw5lsfNYNmfPF3PWWsJXB7OqtPPzNXF7m8YM69acvu3DCQ8O0IlfkTpKQXEdMwyDM/nFbPrPGX44kcO6PZYqJ4oD/Xy5vU1j6vn5MPTm5jQK9KNbdCgNdT5A5LqhoLiO/HTmPFsOZvHZPguHz5znZG5RpdfbNKnPb5s3ZPCNzejboYnXLMgiIq6loKjDDMNg7Z5MPvo+g/9Lz+bchekmgvx9iWxYj56tw+jSvBEDYiLp1KwhjQI1ShCRqhQUdYzNbrDtyFne33GCD3dlAFDf35fb2zahW3QIfdqFE9M0GLOv5h4SEecoKOqIUpudl9b/h3mbfgLAxwQ3tWjE7W2bMCmugyalE5GrpqDwYnlFpXy4M4ONB07z/bFs8orK8Df78FCvVky4o50OJYnINaGg8EIncwpZsPkwy7cfo7jMjp+viZ6tw7jv1mgGdY7SzWkick0pKLyEzW7wwXcnePvrIxzIzAdgQEwkY26PplfbxjrnICIuo6DwAq9uOMhbWw6TV1S+5njPVmE8flcHbm3T2MOVicj1QEFRi9nsBrNW7WXJN0cBmDO8CyNuaak7oEXErRQUtVBmbhGvfPYfUnYcByA4wMw3T95JgwD95xIR99Oep5b5/ICF3y/eAUCzRvUY0b0lE+5oSz0/TaQnIp6hoKglSm12xr+zkw37LYQE+fHaqJvp2yHc02WJiCgoaoN1e04x8+N9ZOYVcUt0KK+O6kqLUK3JLCK1g4LCg8psdu6d/w07j+UA8MKw33LfrdGeLUpE5BcUFB5yzlpC7EtfklNQSrcbQpg74ibaRTTwdFkiIlUoKDzg/R3HmZa6mzK7wQO3R/Ns4m89XZKISLUUFG42b9NP/HXtAXx9THww/nfcEh3q6ZJERGqkoHCj5z7Zx1tbjhARHMCKR3/HDY11wlpEaj8FhZtM+vf3pO7MIDTIj88m9dPMriLiNRQUbrB+byapOzMIDjCzffoA/DSBn4h4EQWFCxmGwbvbjvHUyj00aeDPmj/2UUiIiNdxaVBs3ryZF154AbvdzsiRI3n44YcrvZ6fn89f/vIXTp48ic1m4/e//z3Dhw93ZUlu8/WhLF7dcJDt6efw9/VhwQPdiWhYz9NliYhcMZcFhc1m49lnn2XRokVERkYyYsQIYmNjadeuXUWbd999l7Zt2zJv3jzOnTvHoEGDSEhIwN/f31VlucX3x3O4b+E2AP5fn9ZMvTtGM76KiNdyWVCkpaURHR1Ny5YtAYiPj2fjxo2VgsJkMmG1WjEMA6vVSqNGjTCbvfto2LvbjjL9wz0AfDD+dm6JDvNwRSIiv47L9soWi4WoqKiKx5GRkaSlpVVqc9999zF+/Hj69OmD1WrllVdewcen6jH8lJQUUlJSAMjOznZVyb/aoq+PMGvVPgAWP9RDISEidYLLgsIwjCrPmUyVD79s2bKFmJgYlixZwrFjx3jooYfo3r07DRpUnsoiOTmZ5ORkAJKSklxV8q9yJr+4IiS2T7+TiGCdjxCRusFll+BERUWRmZlZ8dhisRAREVGpTWpqKnfddRcmk4no6GhatGjB4cOHXVWSSw1/cysAfxtxo0JCROoUlwVFly5dSE9P5/jx45SUlLB69WpiY2MrtWnatCnffPMNAFlZWRw5coQWLVq4qiSXmb12P8fOFTC8WwtGdm/p6XJERK4plx16MpvNzJgxg3HjxmGz2Rg+fDjt27dn+fLlAIwePZoJEyYwbdo0EhISMAyDyZMnExbmXcf1U3eeYP6mw/ibfXgxSZP7iUjd49JLjPr160e/fv0qPTd69OiK3yMjI3n77bddWYJLXZzgLzTIj/cf/R0BZi1XKiJ1j3dfi+pBf//0R/75xSF8fUxsfqI/wfU0d5OI1E0Kiqvw7Kp9vP31EVqEBrLyD70UEiJSpykortDpvCLe/voIYfX9+WLyHZq7SUTqPO3lrkBRqa1iao45w29USIjIdUF7uiswa9U+Dp4+z//c2Z64TpGeLkdExC0UFE46nV/E8u3H8Df78Oe4Dp4uR0TEbRQUTigps/OX98vnqXrzvm4erkZExL0UFE6Y8O5ONv3nDLEdI4jtGOH4DSIidYjTQVFQUODKOmqt93ccZ8N+CzFNG/LWg92rTGwoIlLXOQyKnTt3cs8993DPPfcAcODAAWbOnOnqumqFF9fs5y8r0mhc35+UR25TSIjIdclhUMyePZu33nqLkJAQADp27MiOHTtcXZfHHTp9ngWby2eyTXnkdhrqpjoRuU45deipadOmld90mcWF6prHlu0EYNFDPWgX0cBBaxGRusvhndlNmzZl586dmEwmSkpKWLp0KW3btnVHbR7z55TvOZCZz6geLen/G528FpHrm8OhwcyZM3n33XexWCz069eP/fv388wzz7ijNo84e76YD3dlADDtnhgPVyMi4nkORxRHjhzhpZdeqvTcd999xy233OKyojxpxLzyhZT+9UB3GgXqvISIiMMRxfPPP+/Uc3XBrmPZHMmyknBTM03RISJyQbUjil27drFr1y7OnTvHokWLKp4/f/48NpvNLcW5k7W4jFELvgXgiYG/8XA1IiK1R7VBUVpaSkFBATabDavVWvF8gwYNeO2119xSnDv9KeV7isvs/DWpCy3DgjxdjohIrVFtUPTs2ZOePXsybNgwmjdv7s6a3M5uN/jyx9MEB5gZ1fMGT5cjIlKrODyZHRgYyJw5czh06BDFxcUVzy9ZssSlhbnT0m+PUmoz+EP/1p4uRUSk1nF4Mnvy5Mm0adOGEydO8Nhjj9G8eXO6dOnijtrcwlpcxt8//RGACXe083A1IiK1j8OgyMnJYeTIkZjNZnr27Mns2bP54Ycf3FGbW7y4Zj/5xWW8knwT/ua6f8e5iMiVcnjoyWwubxIREcGXX35JREQEmZmZLi/MXdJO5AIwtGvdPg8jInK1HAbF+PHjyc/PZ8qUKTz33HNYrVaefPJJd9Tmcpa8InZn5PJft96gmWFFRKrhMCj69+8PQHBwMEuXLgXK78z2doZh8PTKPQDc/dsoD1cjIlJ7VRsUNpuNtWvXYrFY6NOnDx06dOCLL75g/vz5FBUVsXLlSjeWee19knaK9fssNKxnpne7Jp4uR0Sk1qo2KKZPn86pU6e48cYbef7552nevDm7du1i8uTJDBgwwJ01usTFtSbW/amvDjuJiNSg2qDYs2cPH3/8MT4+PhQXF3Pbbbexfv16wsPD3VmfS5SU2dmdkUvj+v40Cwn0dDkiIrVatdeD+vn5VSxQFBAQQKtWrepESAD84/ODADzct42HKxERqf2qHVEcPnyYhISEisfHjh2r9HjVqlWurcxFSm12/vH5IQDG9VFQiIg4Um1QrFmzxp11uM2HO8sXJRrUOQpfH52bEBFxpNqgqIsTAdrtBv/6qvwk9pwRN3q4GhER7+DSOSs2b97MwIEDiYuLY8GCBZdts23bNhITE4mPj+f+++93ZTm8s+0oB0+fJ/7Gplq9TkTESQ5vuLtaNpuNZ599lkWLFhEZGcmIESOIjY2lXbtLE+/l5eUxa9YsFi5cSLNmzTh79qyryqHUZmf+pvLRxMv33uSyzxERqWucGlEUFRVx+PDhK9pwWloa0dHRtGzZEn9/f+Lj49m4cWOlNqtWrSIuLo5mzZoB0Lhx4yv6jCsx5q1tZOQUMvmuDgSYfV32OSIidY3DoPj8889JTExk3LhxAOzfv59HH33U4YYtFgtRUZemxoiMjMRisVRqk56eTl5eHmPGjCEpKclld3sfOp3Pt4fP0bRRPf7QX1OJi4hcCYeHnv75z3+yYsUKxowZA0BMTAwZGRkON2wYRpXnfnkHtM1mY+/evSxevJiioiJGjRrFTTfdROvWlRcQSklJISUlBYDs7GyHn/1Lz32yH4DX7+umu7BFRK6Qw6Dw9fUlODj4ijccFRVVaTpyi8VCRERElTahoaEEBQURFBRE9+7dOXDgQJWgSE5OJjk5GYCkpKQrqsMwDL4+lEXHqGC63RB6xd9DROR65/DQU/v27Vm1ahU2m4309HSee+45br75Zocb7tKlC+np6Rw/fpySkhJWr15NbGxspTZ33nknO3bsoKysjMLCQtLS0mjbtu3Vf5vLWLw1nTK7wbCb697lviIi7uBwRPH0008zb948/P39efzxx+nduzcTJkxwvGGzmRkzZjBu3DhsNhvDhw+nffv2LF++HIDRo0fTtm1b+vTpw5AhQ/Dx8WHEiBF06NDh13+rCwzDYNaqfQCM7a31sEVErobJuNzJhJ/Zt28fnTp1clc9DiUlJZGamupU283/OcMDb2+nR6tQ3n/0dy6uTESk9rqSfecvORxRzJ49mzNnzjBo0CDi4+Np3779VX2QJ8zb9BMAC8Z093AlIiLey2FQLF26lDNnzrB27VqefvpprFYrd999t1OHnzxp609ZbP3pLENuakZofX9PlyMi4rWcuuEuPDycBx54gFmzZtGxY0feeOMNV9f1q735Zflo4s9x1+6ch4jI9cjhiOKnn35izZo1fPrpp4SEhHDPPfcwdepUd9R21X7MzOerg1n0bB1G6yb1PV2OiIhXcxgU06ZNIz4+nrfeeovIyEh31PSrLd56BIDnh/7Ww5WIiHg/h0Hx73//2x11XFPbj5zjphaN6BB55TcKiohIZdUGxf/8z//w6quvVlrV7udq6wp3J7IL+OmMlYd6tfJ0KSIidUK1QTF9+nQA5s2b57ZiroVXN5Svhz28WwsPVyIiUjdUe9XTxXmZli1bRvPmzSv9LFu2zG0FXql9p/IwmeC3zRt5uhQRkTrB4eWxW7durfLc5s2bXVLMtbD3ZB6/0bkJEZFrptpDT8uWLWP58uUcP3680nkKq9VKt27d3FLclSooKQOgXUQDD1ciIlJ3VBsUCQkJ9O3bl5dffpnHH3+84vn69esTEhLijtqu2PYj5wDoHq3pxEVErpVqg8JkMtGiRQtmzJhR5bWcnJxaGRZvbSm/f2JIV00pLiJyrVQbFI8//jjz588nKSkJk8lUacU6k8lUZf1rT1u3J5OvDmZxV6dIwjS3k4jINVNtUMyfPx8oXzPbGyzYXD63k+7GFhG5thxe9fTdd99RUFAAwEcffcTs2bM5efKkywu7UjuP5WAyQUTDep4uRUSkTnEYFDNnziQwMJADBw6wcOFCmjVrxhNPPOGO2px2Or8IgLs6ecdcVCIi3sRhUJjNZkwmExs2bOCBBx7gwQcfxGq1uqM2p63fawHgni5NPVyJiEjd4zAo6tevz/z58/n444+54447sNlslJWVuaM2p5Ta7MxZewCAATEaUYiIXGsOg+KVV17B39+fF198kfDwcCwWC2PHjnVHbU754sBp8ovLeLhvG+oHOJwMV0RErpDDoAgPDychIYH8/Hy++OILAgICGDp0qBtKc873x3MAuP/WaM8WIiJSRzkMijVr1jBy5EjWrVvH2rVrK36vLb46mEU9Px9ahgV6uhQRkTrJ4bGaefPmsWLFCho3bgzAuXPn+O///m8GDRrk8uKcsTsjlz7tm2AymTxdiohIneRwRGEYRkVIAISEhFS6S9uTcgtKAbQutoiICzkcUfTu3ZuxY8cSHx8PlB+K6tu3r8sLc0ZGTiEA7TVbrIiIyzgMiilTprB+/Xq+++47DMMgOTmZuLg4d9Tm0H8s+QC0bqKgEBFxlWqDIj09nTlz5nD8+HE6dOjAlClTiIysXfcpfPHjaQC6aDU7ERGXqfYcxZNPPkn//v157bXX6Ny5M88995w763LK+r0W6vv70ijIz9OliIjUWdWOKKxWK/feey8Abdq0YdiwYW4ryhnW4jIKS230bB3m6VJEROq0aoOiuLiYffv2VVzhVFRUVOlx586d3VNhNbILSgBI7NrMo3WIiNR11QZFeHg4s2fPrnjcpEmTiscmk4klS5a4vroa7DyWA0A9s69H6xARqeuqDYqlS5e6s44rticjF4De7Zt4uBIRkbrN4Q13tVVhiQ2ASC1UJCLiUi4Nis2bNzNw4EDi4uJYsGBBte3S0tKIiYm5ojmkjp4rIMhfh51ERFzNZUFhs9l49tlnWbhwIatXr+aTTz7h0KFDl23397//nd69e1/R9guKyzR1h4iIGzg119NHH33EP//5TwBOnjxJWlqaww2npaURHR1Ny5Yt8ff3Jz4+no0bN1Zpt3TpUgYOHFhpPilnHMmyUs9PIwoREVdzas3s77//ntWrVwPlK97NmjXL4YYtFgtRUVEVjyMjI7FYLFXabNiwgVGjRtW4rZSUFJKSkkhKSiI7OxuAs9YSGgXqRjsREVdzGBRpaWk888wzBAQEANCoUSNKS0sdbvhyM8z+cirwF154gcmTJ+PrW/PIIDk5mdTUVFJTUwkNDaXMZgegRajWoBARcTWHkwKazWZsNlvFTv7cuXP4+Dg+tREVFUVmZmbFY4vFQkRERKU2e/bsYdKkSQBkZ2ezadMmzGYzAwYMqHHbmXlFAITV93dYh4iI/DoOg2LMmDH84Q9/4OzZs7zyyiusW7eOP/3pTw433KVLF9LT0zl+/DiRkZGsXr2al156qVKbzz//vOL3qVOncscddzgMCYBjZwsAaNVYJ7NFRFzNYVAMGTKEzp078+2332IYBm+88QZt27Z1vGGzmRkzZjBu3DhsNhvDhw+nffv2LF++HIDRo0dfddHZFxYsuqFx0FVvQ0REnOMwKE6ePElgYCD9+/ev9FyzZo7nWOrXrx/9+vWr9Fx1AfHXv/7V4fYu+vHCOhQ6mS0i4noOg+KRRx6p+L24uJgTJ07QunXriqugPGHroSwAWoZqRCEi4moOg2LVqlWVHu/du5eUlBSXFeSM/1jyaRBgxt/stTOQiIh4jSve03bu3Jndu3e7ohan5RWV0UeTAYqIuIXDEcWiRYsqfrfb7ezbt4+wMM8tFnTx9gydnxARcQ+HQWG1Wit+9/X1pV+/fgwcONClRdWkuKz8Zrtu0aEeq0FE5HpSY1DYbDasVitTpkxxVz0O2S8MKSKCAzxciYjI9aHacxRlZWX4+vqyb98+d9bjUMmF6Tv8fXUiW0TEHaodUYwcOZIPP/yQmJgYHn30UQYNGkRQ0KXLUe+66y63FPhLNlv5iKJlmC6NFRFxB4fnKHJzcwkNDWXbtm2VnvdUUBSWlq9spwkBRUTco9qgOHv2LIsWLaJ9+/aYTKZKs8H+chZYd7p46MmTNYiIXE+qDQq73V7piqfawm4YRDbUiWwREXepNijCw8N57LHH3FmLcwzoEBns6SpERK4b1V46dLmFh2qDEpud4HoOT62IiMg1Um1QLF682I1lXAEDsq2OV9gTEZFro9qgCAkJcWMZzjOAmKYNPV2GiMh1w+vuWrMbBgF+Xle2iIjX8so9bnGp3dMliIhcN7wyKJrrZjsREbfxyqDw1b12IiJu451B4aOkEBFxFy8NCq8sW0TEK3nlHlczjIuIuI9X7nJ9NCGgiIjbeGVQFF2YalxERFzPK4OiaSNdHisi4i5eGRS+uj5WRMRtvDIozLo8VkTEbbwyKHQfhYiI+3hlUJh1H4WIiNt45R7XXksXVRIRqYu8MijC6vt7ugQRkeuGVwaFbrgTEXEflwbF5s2bGThwIHFxcSxYsKDK6x9//DEJCQkkJCQwatQoDhw44NR2dTJbRMR9XBYUNpuNZ599loULF7J69Wo++eQTDh06VKlNixYteOedd1i1ahXjx4/n6aefdmrbvhpRiIi4jcuCIi0tjejoaFq2bIm/vz/x8fFs3LixUptu3brRqFEjALp27UpmZqZT29ZFTyIi7mN21YYtFgtRUVEVjyMjI0lLS6u2/YoVK+jbt+9lX0tJSSElJaXisQ49iYi4j8uCwrjMJaymag4Zffvtt6xYsYJly5Zd9vXk5GSSk5MBaH/rnTr0JCLiRi4LiqioqEqHkiwWCxEREVXaHThwgKeeeop//etfhIaGOrXtev6+16xOERGpmcuO9nfp0oX09HSOHz9OSUkJq1evJjY2tlKbkydPMnHiRObOnUvr1q2d3rafTlKIiLiNy0YUZrOZGTNmMG7cOGw2G8OHD6d9+/YsX74cgNGjR/P666+Tk5PDrFmzAPD19SU1NdXhtnXkSUTEfUzG5U4m1GLtb72T3VvWU89Ph59ERJyVlJTk1B/il6NjOCIiUiMFhYiI1Mgrg0LnKERE3Mc7gwIlhYiIu3hlUIiIiPt4ZVDo0JOIiPt4ZVCIiIj7eGVQaEAhIuI+3hkUOvYkIuI2XhkUIiLiPl4ZFBpPiIi4j1cGhYiIuI9XBoVOUYiIuI+XBoWSQkTEXbwyKERExH0UFCIiUiMFhYiI1EhBISIiNVJQiIhIjRQUIiJSI68LCl0YKyLiXl4XFCIi4l4KChERqZH3BYWOPYmIuJX3BYWIiLiVFwaFhhQiIu7khUEhIiLupKAQEZEaeV1Q6MCTiIh7eV1QiIiIeykoRESkRgoKERGpkYJCRERq5NKg2Lx5MwMHDiQuLo4FCxZUed0wDJ5//nni4uJISEhg7969rixHRESugsuCwmaz8eyzz7Jw4UJWr17NJ598wqFDhyq12bx5M+np6axfv57nnnuOmTNnuqocERG5Si4LirS0NKKjo2nZsiX+/v7Ex8ezcePGSm02btzI0KFDMZlMdO3alby8PE6fPl3jdnV5rIiIe5ldtWGLxUJUVFTF48jISNLS0mpsExUVhcViISIiolK7lJQUUlJSygs+n0lSUpKryvYq2dnZhIaGerqMWkF9cYn64hL1xSVHjhy56ve6LCgMw6jynMlkuuI2AMnJySQnJwOQlJREamrqNarSu6kvLlFfXKK+uER9ccmv+QPbZYeeoqKiyMzMrHh8uZHCL9tkZmZWaSMiIp7lsqDo0qUL6enpHD9+nJKSElavXk1sbGylNrGxsaxcuRLDMPj+++8JDg5WUIiI1DIuO/RkNpuZMWMG48aNw2azMXz4cNq3b8/y5csBGD16NP369WPTpk3ExcURGBjIiy++6HC7Fw9Bifri59QXl6gvLlFfXPJr+sJkXO5EgYiIyAW6M1tERGqkoBARkRrV2qDQ9B+XOOqLjz/+mISEBBISEhg1ahQHDhzwQJXu4agvLkpLSyMmJoZ169a5sTr3cqYvtm3bRmJiIvHx8dx///1urtB9HPVFfn4+jz76KEOGDCE+Pp4PPvjAA1W63rRp07j99tsZPHjwZV+/6v2mUQuVlZUZd955p3Hs2DGjuLjYSEhIMA4ePFipzZdffmmMHTvWsNvtxq5du4wRI0Z4qFrXcqYvvvvuOyMnJ8cwjPJ+uZ774mK7MWPGGOPGjTPWrl3rgUpdz5m+yM3NNe6++24jIyPDMAzDyMrK8kSpLudMX7z55pvG3LlzDcMwjLNnzxo9evQwiouLPVGuS23fvt3Ys2ePER8ff9nXr3a/WStHFK6a/sMbOdMX3bp1o1GjRgB07dq10r0pdYkzfQGwdOlSBg4cSOPGjT1QpXs40xerVq0iLi6OZs2aAdTZ/nCmL0wmE1arFcMwsFqtNGrUCLPZZRd9ekyPHj0q9gWXc7X7zVoZFJeb/sNisdTY5uL0H3WNM33xcytWrKBv377uKM3tnP3/YsOGDYwaNcrd5bmVM32Rnp5OXl4eY8aMISkpiZUrV7q5Svdwpi/uu+8+fvrpJ/r06cOQIUOYPn06Pj61cvfnUle736yVkWpcw+k/vN2VfM9vv/2WFStWsGzZMleX5RHO9MULL7zA5MmT8fX1dVdZHuFMX9hsNvbu3cvixYspKipi1KhR3HTTTbRu3dpdZbqFM32xZcsWYmJiWLJkCceOHeOhhx6ie/fuNGjQwF1l1gpXu9+slUGh6T8ucaYvAA4cOMBTTz3Fv/71rzo7CZozfbFnzx4mTZoElE8It2nTJsxmMwMGDHBrra7m7L+R0NBQgoKCCAoKonv37hw4cKDOBYUzfZGamsrDDz+MyWQiOjqaFi1acPjwYW688UZ3l+tRV7vfrJVjL03/cYkzfXHy5EkmTpzI3Llz69xO4Oec6YvPP/+84mfgwIE888wzdS4kwLm+uPPOO9mxYwdlZWUUFhaSlpZG27ZtPVSx6zjTF02bNuWbb74BICsriyNHjtCiRQtPlOtRV7vfrJUjCldN/+GNnOmL119/nZycHGbNmgWAr69vnZwx05m+uF440xdt27atOCbv4+PDiBEj6NChg4crv/ac6YsJEyYwbdo0EhISMAyDyZMnExYW5uHKr71Jkyaxfft2srOz6du3LxMnTqSsrAz4dftNTeEhIiI1qpWHnkREpPZQUIiISI0UFCIiUiMFhYiI1EhBISIiNVJQSK0UExNDYmJixc+JEyeqbXvzzTf/6s+bOnUqsbGxJCYmMmzYMHbt2nXF25g+fTqHDh0CYN68eZVeu1ZTilzsl8GDB/Poo4+Sl5dXY/v9+/ezadOma/LZcv3S5bFSK918881O76yvpG11pk6dyh133MGgQYPYsmULc+bMYdWqVVe9vWtRk6PtTpkyhVatWjF+/Phq26emprJnzx5mzJhxzWuR64dGFOIVrFYrDz74IMOGDSMhIYENGzZUaXP69Gnuu+++ir+4d+zYAZTP85OcnMywYcP44x//iNVqrfGzevTowbFjxwBYtGgRgwcPZvDgwSxevBiAgoICHn74YYYMGcLgwYNZs2YNAGPGjGH37t38/e9/p6ioiMTERB5//HHg0qjnT3/6U6W/8KdOncqnn36KzWZjzpw5DB8+nISEBN577z2HfdK1a9eKCd3S0tIYNWoUQ4cOZdSoURw+fJiSkhJee+011qxZQ2JiImvWrKGgoIBp06YxfPhwhg4detl+FKniV01+LuIiHTt2NIYMGWIMGTLEmDBhglFaWmrk5+cbhlG+nsCAAQMMu91uGIZhdO3a1TAMw3jrrbeMN954wzCM8jUK8vPzjbNnzxr/9V//ZVitVsMwDGP+/PnGP/7xjyqfN2XKlIq1K9asWWOMGDHC2L17tzF48GDDarUa58+fN+655x5j7969xrp164zp06dXvDcvL88wDMO4//77jbS0tEo1XXTx8fr1640nnnjCMAzDKC4uNvr27WsUFhYa7733nvH6669XPD9s2DDj2LFjVeq8uJ2ysjJj4sSJxqZNmwzDMIz8/HyjtLTUMAzD+Prrr43HHnvMMAzD+OCDD4xZs2ZVvP+ll14yVq5caRhG+XoVd911V0XfiFSnVk7hIVKvXj0++uijiselpaW8/PLL/N///R8+Pj5YLBaysrIIDw+vaNOlSxeefPJJysrKGDBgADExMXzxxRccOnSoYnqP0tJSunbtetnPnDt3Lm+++SZhYWG88MILfPPNNwwYMICgoCAA4uLi2LFjB3369GHOnDn87W9/o3///nTv3t3p79W3b1+ef/55SkpK2Lx5M927d6devXp8/fXX/Pjjj3z66adA+YpsR48epWXLlpXef3GkkpGRQefOnenVq1dF+ylTpnD06FFMJhOlpaWX/fwtW7bw+eef8/bbbwNQXFzMqVOn6uQcUHLtKCjEK6xatYpz586RmpqKn58fsbGxFBcXV2rTo0cP3nnnHTZt2sQTTzzB2LFjadiwIb169eLll192+BlPPPEEgwYNqni8devWy7Zr3bo1qampbNq0iZdeeolevXrx2GOPOfU9AgIC6NmzJ1999RVr164lPj4eKJ/++amnnqJPnz41vv9igObn5/PII4/w7rvv8sADD/Dqq69y66238vrrr3PixAkeeOCBarfx2muv0aZNG6fqFQGdoxAvkZ+fT+PGjfHz8+Pbb78lIyOjSpuMjAwaN27Mvffey/Dhw9m7dy9du3Zl586dHD16FIDCwkKOHDni1Gf26NGDDRs2UFhYSEFBARs2bKB79+5YLBYCAwNJTExk7Nix7Nu3r8p7zWZztX/Vx8fHk5qayo4dO+jduzcAvXv3Zvny5RXvOXLkCAUFBdXWFhwczFNPPcXbb79NaWkp+fn5REZGAvDhhx9WtKtfv36lczK9e/fmnXfeqViX4HK1i/ySRhTiFRISEhg/fjxJSUnExMRc9i/i7du389Zbb2E2mwkKCmLOnDmEhYUxe/ZsJk2aRElJCVB+QtmZ6dg7d+5MUlISI0eOBGDEiBF06tSJr776irlz5+Lj44PZbGbmzJlV3nvvvfcyZMgQOnXqxEsvvVTptV69ejFlyhRiY2Px9/cHYOTIkWRkZJCUlIRhGISGhvLGG2/UWF+nTp3o2LEjq1evZty4cUydOpVFixZx2223VbS59dZbWbBgAYmJiTzyyCNMmDCBF198kSFDhmAYBs2bN2f+/PkO+0Kub7o8VkREaqRDTyIiUiMFhYiI1EhBISIiNVJQiIhIjRQUIiJSIwWFiIjUSEEhIiI1+v+zGiMYDpa1vQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[0.24272061700106187, 0.1890124629743475, 0.1665455764824233, 0.212820656122506, 0.18890068741966132]\n"
     ]
    }
   ],
   "source": [
    "# cross-validate gene classifier\n",
    "all_roc_auc, roc_auc, roc_auc_sd, mean_fpr, mean_tpr, confusion, label_dicts \\\n",
    "    = cross_validate(subsampled_train_dataset, targets, labels, nsplits, subsample_size, training_args, freeze_layers, training_output_dir, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# bundle data for plotting\n",
    "bundled_data = []\n",
    "bundled_data += [(roc_auc, roc_auc_sd, mean_fpr, mean_tpr, \"Geneformer\", \"red\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot ROC curve\n",
    "plot_ROC(bundled_data, 'Dosage Sensitive vs Insensitive TFs')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot confusion matrix\n",
    "classes_list = [\"Dosage Sensitive\", \"Dosage Insensitive\"]\n",
    "plot_confusion_matrix(classes_list, confusion, \"Geneformer\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  },
  "vscode": {
   "interpreter": {
    "hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
